ETS-Next-Gen · bradley-erickson · Oct 9, 2024 · Oct 9, 2024 · Oct 9, 2024 · Nov 7, 2024
diff --git a/README.md b/README.md
@@ -6,46 +6,27 @@ which can be installed into a Spacy pipeline. They annotate the Spacy
 parse tree with additional attributes that make it easy to summarize
 information about features of student writing.
 
+Before You Install
+------------
+
+It is helpful to note that the use of AWE Components is best tested using [AWE_Workbench](https://github.com/ArgLab/AWE_Workbench), which utilizes the features defined in AWE Components. There are a series of automatic tests which can be run to verify or validate AWE Components; in addition, there are examples, a web server for parsing documents, and an interactive document highlighting tool for visualizing the document features which are derived from AWE Components.
+
+See AWE Workbench's installations steps and verify that you'd want to use it instead of installing AWE Components directly.
+
 Installation
 ------------
 
-Set up Python 3.9. 3.8 will *not* work. If you wish to use `conda`:
+Set up Python 3.11. If you wish to use `conda`:
 
-    conda create -n test_install python=3.9 pip
+    conda create -n test_install python=3.11 pip
     pip install pip --upgrade
     conda activate test_install
 
 If you wish to use plain old `pip` with `virtualenvwrapper`:
 
-    mkvirtualenv awe_components --python=/usr/bin/python3.9
+    mkvirtualenv awe_components --python=/usr/bin/python3.11
     pip install pip --upgrade
 
-Install prerequisites:
-
-[Holmes Extractor Expandable](https://github.com/ETS-Next-Gen/holmes-extractor-expandable):
-
-    git clone git@github.com:ETS-Next-Gen/holmes-extractor-expandable.git
-    cd holmes-extractor-expandable/~
-    pip install .
-
-[AWE Language Tool](https://github.com/ETS-Next-Gen/AWE_LanguageTool):
-
-    git clone git@github.com:ETS-Next-Gen/AWE_LanguageTool.git
-    cd AWE_LanguageTool/
-    pip install .
-
-[AWE Spell Correct](https://github.com/ETS-Next-Gen/AWE_SpellCorrect)
-
-    git clone git@github.com:ETS-Next-Gen/AWE_SpellCorrect.git
-    cd AWE_SpellCorrect/
-    pip install .
-
-[AWE Lexica](https://github.com/ETS-Next-Gen/AWE_Lexica)
-
-    git clone git@github.com:ETS-Next-Gen/AWE_Lexica.git
-    cd AWE_Lexica/
-    pip install .
-
 Then from the AWE Workbench Components directory:
 
     pip install .

diff --git a/awe_components/components/contentSegmentation.py b/awe_components/components/contentSegmentation.py
@@ -1,10 +1,13 @@
 #!/usr/bin/env python3
 # Copyright 2022, Educational Testing Service
 
-from .utility_functions import *
+from .utility_functions import \
+    match_related_form, getRoot, \
+    in_past_tense_scope, newSpanEntry, \
+    AWE_Info
+
 from operator import itemgetter
-import spacy
-from spacy.tokens import Token, Doc
+from spacy.tokens import Doc
 from spacy.language import Language
 import wordfreq
 

diff --git a/awe_components/components/lexicalClusters.py b/awe_components/components/lexicalClusters.py
@@ -2,24 +2,18 @@
 # Copyright 2022, Educational Testing Service
 
 import re
-import spacy
-import srsly
+import json
 import wordfreq
 import numpy as np
-import os
 from collections import OrderedDict
 
-from scipy.spatial.distance import cosine
-# Standard cosine distance metric
-
 from sklearn.preprocessing import StandardScaler
 from sklearn.cluster import AgglomerativeClustering
 
 from spacy.tokens import Token, Doc
 from spacy.language import Language
 
-from .utility_functions import *
-from ..errors import *
+from .utility_functions import ResolveReference, all_zeros, AWE_Info
 
 lang = "en"
 
@@ -480,7 +474,7 @@ def devword(token):
         # flag assignClusterIDs to run
         # by setting it to a non None value
         token.doc._.clusterInfo_ = []
-        self.assignClusterIDs(token.doc)
+        assignClusterIDs(token.doc)
     devlist = [token.text \
                for token \
                in developmentContentWords(token.doc)]

diff --git a/awe_components/components/lexicalFeatures.py b/awe_components/components/lexicalFeatures.py
@@ -19,18 +19,14 @@
 
 import importlib.resources
 import math
-import numpy as np
 import os
-import re
-from varname import nameof
 
 # English dictionary. Contains information on senses associated with words
 # (a lot more, but that's what we're currently using it for)
 from nltk.corpus import wordnet
 from scipy.spatial.distance import cosine  # Standard cosine distance metric
 from spacy.language import Language
-from spacy.tokens import Doc, Span, Token
-from spacy.vocab import Vocab
+from spacy.tokens import Doc, Token
 import srsly
 import statistics
 # https://github.com/rspeer/wordfreq
@@ -41,7 +37,17 @@
 
 import awe_lexica
 
-from .utility_functions import *  # <-- Paul, import only what you need here
+from .utility_functions import \
+    setExtensionFunctions, alphanum_word, \
+    sylco, content_tags, \
+    ResolveReference, AWE_Info, \
+    possessive_or_determiner, personal_or_indefinite_pronoun, \
+    all_zeros, is_temporal, \
+    locative_adverbs, existential_there, \
+    major_locative_prepositions, all_locative_prepositions, \
+    loc_sverbs, loc_overbs, \
+    deictics
+
 from ..errors import LexiconMissingError
 
 def lexicon_path(lexicon):

diff --git a/awe_components/components/syntaxDiscourseFeats.py b/awe_components/components/syntaxDiscourseFeats.py
@@ -1,26 +1,25 @@
 #!/usr/bin/env python3
 # Copyright 2022, Educational Testing Service
 
-import math
 import os
 import srsly
-from varname import nameof
 
-from enum import Enum
-from spacy.tokens import Doc, Span, Token
+from spacy.tokens import Doc, Token
 from spacy.language import Language
 
 from scipy.spatial.distance import cosine
 # Standard cosine distance metric
 
-from .utility_functions import *
-from ..errors import *
-from importlib import resources
-
-from nltk.corpus import wordnet
-# English dictionary. Contains information on senses associated with words
-# (a lot more, but that's what we're currently using it for)
+from .utility_functions import \
+    setExtensionFunctions, AWE_Info, \
+    in_past_tense_scope, getRoot, \
+    temporalPhrase, newSpanEntry, \
+    adj_noun_or_verb, content_tags, \
+    possessive_or_determiner, ResolveReference, \
+    tensed_clause
 
+from importlib import resources
+from ..errors import LexiconMissingError
 
 @Language.factory("syntaxdiscoursefeatures")
 def SyntaxAndDiscourseFeatures(nlp, name):
@@ -45,21 +44,20 @@ class SyntaxAndDiscourseFeatDef(object):
     ) as filepath:
         TRANSITION_CATEGORIES_PATH = filepath
 
-    datapaths = [{'pathname': nameof(TRANSITION_TERMS_PATH),
-                  'value': TRANSITION_TERMS_PATH},
-                 {'pathname': nameof(TRANSITION_CATEGORIES_PATH),
-                  'value': TRANSITION_CATEGORIES_PATH}]
-
     transition_terms = {}
     transition_categories = {}
 
     def package_check(self, lang):
-        for path in self.datapaths:
-            if not os.path.exists(path['value']):
-                raise LexiconMissingError(
-                    "Trying to load AWE Workbench Lexicon Module \
-                    without {name} datafile".format(name=path['pathname'])
-                )
+        if not os.path.exists(self.TRANSITION_TERMS_PATH):
+            raise LexiconMissingError(
+                "Trying to load AWE Workbench Syntax and Discourse Feature \
+                 Module without supporting datafile {}".format(self.TRANSITION_TERMS_PATH)
+            )
+        if not os.path.exists(self.TRANSITION_CATEGORIES_PATH):
+            raise LexiconMissingError(
+                "Trying to load AWE Workbench Syntax and Discourse Feature \
+                 Module without supporting datafile {}".format(self.TRANSITION_CATEGORIES_PATH)
+            )
 
     def load_lexicons(self, lang):
         self.transition_terms = \

diff --git a/awe_components/components/utility_functions.py b/awe_components/components/utility_functions.py
@@ -2899,6 +2899,9 @@ def setTokenEntry(name, token, value):
     # attribute.                          #
     # TBD: put security check in for this #
     #######################################
+    elif "blob" in name:
+        name = name.replace("blob.", "")
+        entry['value'] = getattr(token._.blob, name)
     elif token.has_extension(name):
         # TODO: Use Token.get_extension
         # https://spacy.io/api/token
@@ -3384,6 +3387,16 @@ def AWE_Info(document: Doc,
             raise AWE_Workbench_Error(
                 'Invalid indicator ' + indicator)                   
 
+        # QUICK FIX: spacytextblob no longer references polarity, subjectivity, 
+        # nor assessments via doc._.X, but rather doc._.blob.X
+        # We are quickly fixing this problem in AWE_Info
+        if indicator == "polarity":
+            indicator = "blob.polarity"
+        elif indicator == "subjectivity":
+            indicator = "blob.subjectivity"
+        elif indicator == "assessments":
+            indicator = "blob.assessments"
+
         if infoType == 'Doc':
             baseInfo = createSpanInfo(indicator,
                                       document)

diff --git a/awe_components/components/viewpointFeatures.py b/awe_components/components/viewpointFeatures.py
@@ -3,10 +3,7 @@
 
 import os
 import srsly
-import imp
 
-from enum import Enum
-from collections import OrderedDict
 from spacy.tokens import Doc, Span, Token
 from spacy.language import Language
 
@@ -16,8 +13,95 @@
 from nltk.corpus import wordnet
 # (a lot more, but that's what we're currently using it for)
 
-from .utility_functions import *
-from ..errors import *
+from .utility_functions import \
+    AWE_Info, \
+    absolute_degree, \
+    adjectival_complement_dependencies, \
+    adjectival_mod_dependencies , \
+    adjectival_predicates, \
+    animate_ent_type , \
+    auxiliary_dependencies, \
+    auxiliary_or_adverb, \
+    be_verbs , \
+    clausal_complements , \
+    clausal_modifier_dependencies , \
+    clausal_subject_or_complement, \
+    common_evaluation_adjective, \
+    common_hedge_word, \
+    complements , \
+    containsDistinctReference, \
+    content_pos , \
+    contracted_verb, \
+    contraction, \
+    core_temporal_preps , \
+    coreViewpointPredicate, \
+    dative_preps , \
+    demonstratives , \
+    elliptical_verb, \
+    emphatic_adjective, \
+    emphatic_adjective, \
+    emphatic_adverb, \
+    first_person_pronouns , \
+    function_word_tags , \
+    generalArgumentPredicate, \
+    general_complements_and_modifiers , \
+    generalViewpointPredicate, \
+    getDative, \
+    getLightVerbs, \
+    getLinkedNodes, \
+    getLogicalObject, \
+    getObject, \
+    getPrepObject, \
+    getRoot, \
+    getRoots, \
+    getSubject, \
+    getSubject, \
+    getTensedVerbHead, \
+    illocutionary_tag, \
+    inanimate_3sg_pronouns, \
+    indefinite_comparison, \
+    indefinite_pronoun , \
+    in_modal_scope, \
+    in_past_tense_scope, \
+    is_definite_nominal, \
+    isRoot, \
+    loose_clausal_dependencies , \
+    newSpanEntry, \
+    newTokenEntry, \
+    nominal_pos , \
+    nonhuman_ent_type , \
+    object_predicate_dependencies , \
+    object_predicate_dependencies, \
+    other_conversational_idioms, \
+    other_conversational_vocabulary, \
+    personal_or_indefinite_pronoun , \
+    personal_or_indefinite_pronoun , \
+    pos_degree_mod , \
+    prehead_modifiers2 , \
+    present_semimodals , \
+    private_mental_state_tag, \
+    quantifying_determiners, \
+    quotationMark, \
+    raising_complement, \
+    ResolveReference, \
+    rootTree, \
+    scanForAnimatePotentialAntecedents, \
+    second_person_pronouns , \
+    setExtensionFunctions, \
+    stance_adverb, \
+    stancePredicate, \
+    subject_dependencies , \
+    subject_or_object_nom , \
+    takesBareInfinitive, \
+    tensed_clause, \
+    third_person_pronouns , \
+    tough_complement, \
+    underlying_object_dependencies , \
+    verbal_mod_dependencies , \
+    verbal_pos , \
+    wh_question_word
+
+from ..errors import LexiconMissingError
 from importlib import resources
 
 
@@ -65,13 +149,13 @@ class ViewpointFeatureDef:
     def package_check(self, lang):
         if not os.path.exists(self.STANCE_PERSPECTIVE_PATH):
             raise LexiconMissingError(
-                "Trying to load AWE Workbench Syntaxa and Discourse Feature \
-                 Module without supporting datafile {}".format(filepath)
+                "Trying to load AWE Workbench Syntax and Discourse Feature \
+                 Module without supporting datafile {}".format(self.STANCE_PERSPECTIVE_PATH)
             )
         if not os.path.exists(self.MORPHOLEX_PATH):
             raise LexiconMissingError(
-                "Trying to load AWE Workbench Syntaxa and Discourse Feature \
-                 Module without supporting datafile {}".format(filepath)
+                "Trying to load AWE Workbench Syntax and Discourse Feature \
+                 Module without supporting datafile {}".format(self.MORPHOLEX_PATH)
             )
 
     def load_lexicon(self, lang):
@@ -4636,15 +4720,15 @@ def propagateNegation(self, doc: Doc):
             # neutral.
             if tok._.vwp_evaluation \
                or tok._.vwp_hedge \
-               or tok.text in doc._.assessments:
-                if tok._.polarity < 0 or tok._.sentiword < 0:
-                    tok._.vwp_tone_ = min(tok._.polarity, tok._.sentiword)
-                elif tok._.polarity > 0 and tok._.sentiword > 0:
-                    tok._.vwp_tone_ = max(tok._.polarity, tok._.sentiword)
+               or tok.text in doc._.blob.sentiment_assessments.assessments:
+                if tok._.blob.sentiment_assessments.polarity < 0 or tok._.sentiword < 0:
+                    tok._.vwp_tone_ = min(tok._.blob.sentiment_assessments.polarity, tok._.sentiword)
+                elif tok._.blob.sentiment_assessments.polarity > 0 and tok._.sentiword > 0:
+                    tok._.vwp_tone_ = max(tok._.blob.sentiment_assessments.polarity, tok._.sentiword)
                 else:
-                    tok._.vwp_tone_ = (tok._.polarity + tok._.sentiword) / 2
+                    tok._.vwp_tone_ = (tok._.blob.sentiment_assessments.polarity + tok._.sentiword) / 2
             else:
-                tok._.vwp_tone_ = min(tok._.polarity, tok._.sentiword)
+                tok._.vwp_tone_ = min(tok._.blob.sentiment_assessments.polarity, tok._.sentiword)
 
             # rule order fixes to the tone variable are generally a bad idea,
             # but these are so common that fixing them gets rid of a lot of