From 4f863b962fa4c2781635fa07c9af4d5dd06d1102 Mon Sep 17 00:00:00 2001 From: Bradley Erickson Date: Wed, 9 Oct 2024 09:35:35 -0400 Subject: [PATCH 01/18] updated install data path --- install.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/install.py b/install.py index 930943c..88242a0 100644 --- a/install.py +++ b/install.py @@ -1,6 +1,4 @@ from setuptools.command.install import install as _install -from setuptools.command.develop import develop as _develop -from distutils import log import os import subprocess import sys @@ -16,4 +14,5 @@ class AWEInstall(_install): ''' def run(self): _install.run(self) - subprocess.run(['python', 'awe_components/setup/data.py'], env=modified_env) + script_path = os.path.join(os.path.dirname(__file__), 'awe_components', 'setup', 'data.py') + subprocess.run([sys.executable, script_path], env=modified_env) From 9da9e4c719253a265e0f386c7a75a491f803d58c Mon Sep 17 00:00:00 2001 From: Bradley Erickson Date: Wed, 9 Oct 2024 10:08:10 -0400 Subject: [PATCH 02/18] added find packages flag to setup --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index c5d2fee..e37b5b7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,6 +25,7 @@ classifiers = Topic :: Text Processing :: Automated Writing Evaluation [options] +packages = find: include_package_data = True python_requires = >=3.9 cmdclass = From 5bbb434bf8c2f723ba3887c875d4c481217cfd17 Mon Sep 17 00:00:00 2001 From: Bradley Erickson Date: Wed, 9 Oct 2024 10:53:39 -0400 Subject: [PATCH 03/18] added function to download so we can call it from code if needed --- awe_components/setup/data.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/awe_components/setup/data.py b/awe_components/setup/data.py index cd5018c..3536dca 100644 --- a/awe_components/setup/data.py +++ b/awe_components/setup/data.py @@ -4,9 +4,14 @@ from spacy.cli.download import download import os -if __name__ == '__main__': + +def download_models(): print('Downloading Spacy and Coreferee Lexicons') download('en_core_web_sm') download('en_core_web_lg') download('en_core_web_trf') os.system("python3 -m coreferee install en") + + +if __name__ == '__main__': + download_models() From 1a419f45a45f60219ceb68b1f934527c35772f5d Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 7 Nov 2024 15:55:40 -0500 Subject: [PATCH 04/18] Updated numpy dependency --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index c5d2fee..5a163cc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,7 +36,7 @@ install_requires = coreferee rdflib spacytextblob - numpy + numpy==1.26.4 srsly wordfreq statistics From b80deba770a2f0aa39cc10c8fec408f338886822 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 7 Nov 2024 15:56:59 -0500 Subject: [PATCH 05/18] Updated reference to lexica --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 6c11157..b499a22 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,7 +31,7 @@ python_requires = >=3.9 cmdclass = install = install.AWEInstall install_requires = - awe_lexica + awe_lexica @ git+https://github.com/ArgLab/AWE_Lexica.git spacy holmes_extractor coreferee From 0d86877ad50d3a4fdf806917d27c218012dedc69 Mon Sep 17 00:00:00 2001 From: Caleb Scott Date: Mon, 16 Dec 2024 15:41:08 -0500 Subject: [PATCH 06/18] Numpy version (#6) * Updated numpy dependency * Updated reference to lexica * Locked torch version * Removed holmes dependency * Added TODO for numpy version locking * Fixed version in comment * Fixed version typo * Added websocket version lock --- awe_components/setup/data.py | 3 +++ setup.cfg | 9 ++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/awe_components/setup/data.py b/awe_components/setup/data.py index 3536dca..626a5ea 100644 --- a/awe_components/setup/data.py +++ b/awe_components/setup/data.py @@ -1,6 +1,9 @@ #!/usr/bin/env python3 # Copyright 2022, Educational Testing Service +# TODO: using numpy>1.26.4 seems to cause errors when installing from awe_workbench. +# This is related to the desired model (subwordbert) that we use for AWE. +# Version-locking seems to keep this from failing. from spacy.cli.download import download import os diff --git a/setup.cfg b/setup.cfg index e37b5b7..66901e6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,13 +31,12 @@ python_requires = >=3.9 cmdclass = install = install.AWEInstall install_requires = - awe_lexica + awe_lexica @ git+https://github.com/ArgLab/AWE_Lexica.git spacy - holmes_extractor coreferee rdflib spacytextblob - numpy + numpy==1.26.4 srsly wordfreq statistics @@ -45,13 +44,13 @@ install_requires = scikit-learn nltk aenum - websockets + websockets<14 websocket-client pytest clint pygtrie transformers - torch + torch==2.4.1 Path [options.package_data] * = *.cfg, *.csv, *.json, *.txt From 356d9a743155dd2de66c3122f35bb8b9391ac090 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Tue, 28 Jan 2025 11:13:26 -0500 Subject: [PATCH 07/18] textblob features hotfix --- awe_components/components/utility_functions.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/awe_components/components/utility_functions.py b/awe_components/components/utility_functions.py index 67877df..8eb532b 100644 --- a/awe_components/components/utility_functions.py +++ b/awe_components/components/utility_functions.py @@ -3384,6 +3384,16 @@ def AWE_Info(document: Doc, raise AWE_Workbench_Error( 'Invalid indicator ' + indicator) + # QUICK FIX: spacytextblob no longer references polarity, subjectivity, + # nor assessments via doc._.X, but rather doc._.blob.X + # We are quickly fixing this problem in AWE_Info + if indicator == "polarity": + indicator = "blob.polarity" + elif indicator == "subjectivity": + indicator = "blob.subjectivity" + elif indicator == "assessments": + indicator = "blob.assessments" + if infoType == 'Doc': baseInfo = createSpanInfo(indicator, document) From 384cca0970a977ce30c8f323fd3361af41ea2642 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Sun, 2 Feb 2025 16:39:42 -0500 Subject: [PATCH 08/18] Added spacytextblob doc feature name fix --- awe_components/components/utility_functions.py | 3 +++ awe_components/components/viewpointFeatures.py | 14 +++++++------- setup.cfg | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/awe_components/components/utility_functions.py b/awe_components/components/utility_functions.py index 8eb532b..4230716 100644 --- a/awe_components/components/utility_functions.py +++ b/awe_components/components/utility_functions.py @@ -2899,6 +2899,9 @@ def setTokenEntry(name, token, value): # attribute. # # TBD: put security check in for this # ####################################### + elif "blob" in name: + name = name.replace("blob.", "") + entry['value'] = getattr(token._.blob, name) elif token.has_extension(name): # TODO: Use Token.get_extension # https://spacy.io/api/token diff --git a/awe_components/components/viewpointFeatures.py b/awe_components/components/viewpointFeatures.py index 92d10ef..d257127 100644 --- a/awe_components/components/viewpointFeatures.py +++ b/awe_components/components/viewpointFeatures.py @@ -4636,15 +4636,15 @@ def propagateNegation(self, doc: Doc): # neutral. if tok._.vwp_evaluation \ or tok._.vwp_hedge \ - or tok.text in doc._.assessments: - if tok._.polarity < 0 or tok._.sentiword < 0: - tok._.vwp_tone_ = min(tok._.polarity, tok._.sentiword) - elif tok._.polarity > 0 and tok._.sentiword > 0: - tok._.vwp_tone_ = max(tok._.polarity, tok._.sentiword) + or tok.text in doc._.blob.sentiment_assessments.assessments: + if tok._.blob.sentiment_assessments.polarity < 0 or tok._.sentiword < 0: + tok._.vwp_tone_ = min(tok._.sentiment_assessments.polarity, tok._.sentiword) + elif tok._.sentiment_assessments.polarity > 0 and tok._.sentiword > 0: + tok._.vwp_tone_ = max(tok._.sentiment_assessments.polarity, tok._.sentiword) else: - tok._.vwp_tone_ = (tok._.polarity + tok._.sentiword) / 2 + tok._.vwp_tone_ = (tok._.sentiment_assessments.polarity + tok._.sentiword) / 2 else: - tok._.vwp_tone_ = min(tok._.polarity, tok._.sentiword) + tok._.vwp_tone_ = min(tok._.sentiment_assessments.polarity, tok._.sentiword) # rule order fixes to the tone variable are generally a bad idea, # but these are so common that fixing them gets rid of a lot of diff --git a/setup.cfg b/setup.cfg index 66901e6..66536f8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,7 +31,7 @@ python_requires = >=3.9 cmdclass = install = install.AWEInstall install_requires = - awe_lexica @ git+https://github.com/ArgLab/AWE_Lexica.git + awe_lexica @ git+https://github.com/ArgLab/AWE_Lexica.git@varname-patch spacy coreferee rdflib From c8a874d4bfc7346182ab977b3bd07396d83c32cd Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Sun, 2 Feb 2025 16:46:03 -0500 Subject: [PATCH 09/18] Added blob --- awe_components/components/viewpointFeatures.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/awe_components/components/viewpointFeatures.py b/awe_components/components/viewpointFeatures.py index d257127..6f32fac 100644 --- a/awe_components/components/viewpointFeatures.py +++ b/awe_components/components/viewpointFeatures.py @@ -4638,13 +4638,13 @@ def propagateNegation(self, doc: Doc): or tok._.vwp_hedge \ or tok.text in doc._.blob.sentiment_assessments.assessments: if tok._.blob.sentiment_assessments.polarity < 0 or tok._.sentiword < 0: - tok._.vwp_tone_ = min(tok._.sentiment_assessments.polarity, tok._.sentiword) - elif tok._.sentiment_assessments.polarity > 0 and tok._.sentiword > 0: - tok._.vwp_tone_ = max(tok._.sentiment_assessments.polarity, tok._.sentiword) + tok._.vwp_tone_ = min(tok._.blob.sentiment_assessments.polarity, tok._.sentiword) + elif tok._.blob.sentiment_assessments.polarity > 0 and tok._.sentiword > 0: + tok._.vwp_tone_ = max(tok._.blob.sentiment_assessments.polarity, tok._.sentiword) else: - tok._.vwp_tone_ = (tok._.sentiment_assessments.polarity + tok._.sentiword) / 2 + tok._.vwp_tone_ = (tok._.blob.sentiment_assessments.polarity + tok._.sentiword) / 2 else: - tok._.vwp_tone_ = min(tok._.sentiment_assessments.polarity, tok._.sentiword) + tok._.vwp_tone_ = min(tok._.blob.sentiment_assessments.polarity, tok._.sentiword) # rule order fixes to the tone variable are generally a bad idea, # but these are so common that fixing them gets rid of a lot of From 548793787d152ba02554086be64e05b0f283d554 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 3 Apr 2025 12:14:50 -0400 Subject: [PATCH 10/18] Added changes to config for testing --- extensions.txt | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ setup.cfg | 2 +- 2 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 extensions.txt diff --git a/extensions.txt b/extensions.txt new file mode 100644 index 0000000..8dc3ff2 --- /dev/null +++ b/extensions.txt @@ -0,0 +1,61 @@ +AWE_Info +all_cluster_info +assessments +blob +clusterInfo +clusterInfo_ +concrete_details +corefChainInfo +coref_chains +direct_speech_spans +has_governing_subject +intersentence_cohesions +main_cluster_spans +main_cluster_spans_ +main_ideas +main_ideas_ +negation_tokens +nominalReferences +polarity +prompt +prompt_ +prompt_language +prompt_language_ +prompt_related +prompt_related_ +propositional_attitudes_ +sentenceThemes +sentence_types +sliding_window_cohesions +subjectivity +supporting_details +supporting_details_ +supporting_ideas +supporting_ideas_ +syntacticDepthsOfRhemes +syntacticDepthsOfThemes +syntacticProfile +syntacticProfileNormed +syntacticVariety +tense_changes +token_vectors +transition_distances +transition_word_profile +transition_word_profile_ +transitions +vwp_allocentric +vwp_argumentation +vwp_character_traits +vwp_direct_speech +vwp_egocentric +vwp_emotion_states +vwp_interactive +vwp_perspective_spans +vwp_perspective_spans_ +vwp_propositional_attitudes +vwp_quoted +vwp_social_awareness +vwp_stance_markers +vwp_stance_markers_ +vwp_statements_of_fact +vwp_statements_of_opinion diff --git a/setup.cfg b/setup.cfg index 66901e6..3bd4d44 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,7 +33,7 @@ cmdclass = install_requires = awe_lexica @ git+https://github.com/ArgLab/AWE_Lexica.git spacy - coreferee + coreferee @ git+https://github.com/Arglab/coreferee.git@latest_spacy rdflib spacytextblob numpy==1.26.4 From 709accb7a369e2e9268760cdd2e198a30d25683e Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 3 Apr 2025 12:29:19 -0400 Subject: [PATCH 11/18] Removed unnecessary dependencies, cleaned up existing --- awe_components/components/lexicalFeatures.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/awe_components/components/lexicalFeatures.py b/awe_components/components/lexicalFeatures.py index 0de6b01..cc4e282 100644 --- a/awe_components/components/lexicalFeatures.py +++ b/awe_components/components/lexicalFeatures.py @@ -19,18 +19,14 @@ import importlib.resources import math -import numpy as np import os -import re -from varname import nameof # English dictionary. Contains information on senses associated with words # (a lot more, but that's what we're currently using it for) from nltk.corpus import wordnet from scipy.spatial.distance import cosine # Standard cosine distance metric from spacy.language import Language -from spacy.tokens import Doc, Span, Token -from spacy.vocab import Vocab +from spacy.tokens import Doc, Token import srsly import statistics # https://github.com/rspeer/wordfreq @@ -41,7 +37,17 @@ import awe_lexica -from .utility_functions import * # <-- Paul, import only what you need here +from .utility_functions import \ + setExtensionFunctions, alphanum_word, \ + sylco, content_tags, \ + ResolveReference, AWE_Info, \ + possessive_or_determiner, personal_or_indefinite_pronoun, \ + all_zeros, is_temporal, \ + locative_adverbs, existential_there, \ + major_locative_prepositions, all_locative_prepositions, \ + loc_sverbs, loc_overbs, \ + deictics + from ..errors import LexiconMissingError def lexicon_path(lexicon): From a3b422ba5f44dc8237cc89d10dc2bda775ff1279 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 3 Apr 2025 12:37:32 -0400 Subject: [PATCH 12/18] Removed more unnecessary imports, fixed other import issues --- awe_components/components/contentSegmentation.py | 3 +-- awe_components/components/lexicalClusters.py | 11 +++-------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/awe_components/components/contentSegmentation.py b/awe_components/components/contentSegmentation.py index 6895545..cdac907 100644 --- a/awe_components/components/contentSegmentation.py +++ b/awe_components/components/contentSegmentation.py @@ -3,8 +3,7 @@ from .utility_functions import * from operator import itemgetter -import spacy -from spacy.tokens import Token, Doc +from spacy.tokens import Doc from spacy.language import Language import wordfreq diff --git a/awe_components/components/lexicalClusters.py b/awe_components/components/lexicalClusters.py index 42e898c..bf73c21 100644 --- a/awe_components/components/lexicalClusters.py +++ b/awe_components/components/lexicalClusters.py @@ -2,23 +2,18 @@ # Copyright 2022, Educational Testing Service import re -import spacy -import srsly +import json import wordfreq import numpy as np -import os from collections import OrderedDict -from scipy.spatial.distance import cosine -# Standard cosine distance metric - from sklearn.preprocessing import StandardScaler from sklearn.cluster import AgglomerativeClustering from spacy.tokens import Token, Doc from spacy.language import Language -from .utility_functions import * +from .utility_functions import ResolveReference, all_zeros, AWE_Info from ..errors import * lang = "en" @@ -480,7 +475,7 @@ def devword(token): # flag assignClusterIDs to run # by setting it to a non None value token.doc._.clusterInfo_ = [] - self.assignClusterIDs(token.doc) + assignClusterIDs(token.doc) devlist = [token.text \ for token \ in developmentContentWords(token.doc)] From 6422573573e687bd687acdc2454adee530ffd37f Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 3 Apr 2025 13:00:12 -0400 Subject: [PATCH 13/18] Removed even moar dependency issues, cleaned up imports --- .../components/contentSegmentation.py | 6 +- awe_components/components/lexicalClusters.py | 1 - .../components/syntaxDiscourseFeats.py | 18 ++-- .../components/viewpointFeatures.py | 93 ++++++++++++++++++- 4 files changed, 101 insertions(+), 17 deletions(-) diff --git a/awe_components/components/contentSegmentation.py b/awe_components/components/contentSegmentation.py index cdac907..58903d9 100644 --- a/awe_components/components/contentSegmentation.py +++ b/awe_components/components/contentSegmentation.py @@ -1,7 +1,11 @@ #!/usr/bin/env python3 # Copyright 2022, Educational Testing Service -from .utility_functions import * +from .utility_functions import \ + match_related_form, getRoot, \ + in_past_tense_scope, newSpanEntry, \ + AWE_Info + from operator import itemgetter from spacy.tokens import Doc from spacy.language import Language diff --git a/awe_components/components/lexicalClusters.py b/awe_components/components/lexicalClusters.py index bf73c21..796ac19 100644 --- a/awe_components/components/lexicalClusters.py +++ b/awe_components/components/lexicalClusters.py @@ -14,7 +14,6 @@ from spacy.language import Language from .utility_functions import ResolveReference, all_zeros, AWE_Info -from ..errors import * lang = "en" diff --git a/awe_components/components/syntaxDiscourseFeats.py b/awe_components/components/syntaxDiscourseFeats.py index 7a0a63e..203277f 100644 --- a/awe_components/components/syntaxDiscourseFeats.py +++ b/awe_components/components/syntaxDiscourseFeats.py @@ -1,27 +1,25 @@ #!/usr/bin/env python3 # Copyright 2022, Educational Testing Service -import math import os import srsly from varname import nameof -from enum import Enum -from spacy.tokens import Doc, Span, Token +from spacy.tokens import Doc, Token from spacy.language import Language from scipy.spatial.distance import cosine # Standard cosine distance metric -from .utility_functions import * -from ..errors import * +from .utility_functions import \ + setExtensionFunctions, AWE_Info, \ + in_past_tense_scope, getRoot, \ + temporalPhrase, newSpanEntry, \ + adj_noun_or_verb, content_tags, \ + possessive_or_determiner, ResolveReference, \ + tensed_clause from importlib import resources -from nltk.corpus import wordnet -# English dictionary. Contains information on senses associated with words -# (a lot more, but that's what we're currently using it for) - - @Language.factory("syntaxdiscoursefeatures") def SyntaxAndDiscourseFeatures(nlp, name): return SyntaxAndDiscourseFeatDef() diff --git a/awe_components/components/viewpointFeatures.py b/awe_components/components/viewpointFeatures.py index 92d10ef..a1bad24 100644 --- a/awe_components/components/viewpointFeatures.py +++ b/awe_components/components/viewpointFeatures.py @@ -3,10 +3,7 @@ import os import srsly -import imp -from enum import Enum -from collections import OrderedDict from spacy.tokens import Doc, Span, Token from spacy.language import Language @@ -16,8 +13,94 @@ from nltk.corpus import wordnet # (a lot more, but that's what we're currently using it for) -from .utility_functions import * -from ..errors import * +from .utility_functions import \ + absolute_degree, \ + adjectival_complement_dependencies, \ + adjectival_mod_dependencies , \ + adjectival_predicates, \ + animate_ent_type , \ + auxiliary_dependencies, \ + auxiliary_or_adverb, \ + be_verbs , \ + clausal_complements , \ + clausal_modifier_dependencies , \ + clausal_subject_or_complement, \ + common_evaluation_adjective, \ + common_hedge_word, \ + complements , \ + containsDistinctReference, \ + content_pos , \ + contracted_verb, \ + contraction, \ + core_temporal_preps , \ + coreViewpointPredicate, \ + dative_preps , \ + demonstratives , \ + elliptical_verb, \ + emphatic_adjective, \ + emphatic_adjective, \ + emphatic_adverb, \ + first_person_pronouns , \ + function_word_tags , \ + generalArgumentPredicate, \ + general_complements_and_modifiers , \ + generalViewpointPredicate, \ + getDative, \ + getLightVerbs, \ + getLinkedNodes, \ + getLogicalObject, \ + getObject, \ + getPrepObject, \ + getRoot, \ + getRoots, \ + getSubject, \ + getSubject, \ + getTensedVerbHead, \ + illocutionary_tag, \ + inanimate_3sg_pronouns, \ + indefinite_comparison, \ + indefinite_pronoun , \ + in_modal_scope, \ + in_past_tense_scope, \ + is_definite_nominal, \ + isRoot, \ + loose_clausal_dependencies , \ + newSpanEntry, \ + newTokenEntry, \ + nominal_pos , \ + nonhuman_ent_type , \ + object_predicate_dependencies , \ + object_predicate_dependencies, \ + other_conversational_idioms, \ + other_conversational_vocabulary, \ + personal_or_indefinite_pronoun , \ + personal_or_indefinite_pronoun , \ + pos_degree_mod , \ + prehead_modifiers2 , \ + present_semimodals , \ + private_mental_state_tag, \ + quantifying_determiners, \ + quotationMark, \ + raising_complement, \ + ResolveReference, \ + rootTree, \ + scanForAnimatePotentialAntecedents, \ + second_person_pronouns , \ + setExtensionFunctions, \ + stance_adverb, \ + stancePredicate, \ + subject_dependencies , \ + subject_or_object_nom , \ + takesBareInfinitive, \ + tensed_clause, \ + third_person_pronouns , \ + tough_complement, \ + underlying_object_dependencies , \ + verbal_mod_dependencies , \ + verbal_pos , \ + wh_question_word + +from ..errors import LexiconMissingError from importlib import resources From a7b7189b0f7c157b699979718eabf3ceaa459ab9 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 3 Apr 2025 13:04:03 -0400 Subject: [PATCH 14/18] Added back missing error imports, fixed variable decl --- awe_components/components/syntaxDiscourseFeats.py | 2 ++ awe_components/components/viewpointFeatures.py | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/awe_components/components/syntaxDiscourseFeats.py b/awe_components/components/syntaxDiscourseFeats.py index 203277f..0f51970 100644 --- a/awe_components/components/syntaxDiscourseFeats.py +++ b/awe_components/components/syntaxDiscourseFeats.py @@ -18,7 +18,9 @@ adj_noun_or_verb, content_tags, \ possessive_or_determiner, ResolveReference, \ tensed_clause + from importlib import resources +from ..errors import LexiconMissingError @Language.factory("syntaxdiscoursefeatures") def SyntaxAndDiscourseFeatures(nlp, name): diff --git a/awe_components/components/viewpointFeatures.py b/awe_components/components/viewpointFeatures.py index a1bad24..bb64155 100644 --- a/awe_components/components/viewpointFeatures.py +++ b/awe_components/components/viewpointFeatures.py @@ -14,6 +14,7 @@ # (a lot more, but that's what we're currently using it for) from .utility_functions import \ + AWE_Info, \ absolute_degree, \ adjectival_complement_dependencies, \ adjectival_mod_dependencies , \ @@ -148,13 +149,13 @@ class ViewpointFeatureDef: def package_check(self, lang): if not os.path.exists(self.STANCE_PERSPECTIVE_PATH): raise LexiconMissingError( - "Trying to load AWE Workbench Syntaxa and Discourse Feature \ - Module without supporting datafile {}".format(filepath) + "Trying to load AWE Workbench Syntax and Discourse Feature \ + Module without supporting datafile {}".format(self.STANCE_PERSPECTIVE_PATH) ) if not os.path.exists(self.MORPHOLEX_PATH): raise LexiconMissingError( - "Trying to load AWE Workbench Syntaxa and Discourse Feature \ - Module without supporting datafile {}".format(filepath) + "Trying to load AWE Workbench Syntax and Discourse Feature \ + Module without supporting datafile {}".format(self.MORPHOLEX_PATH) ) def load_lexicon(self, lang): From f8c22601c38f6b18a37d72f6546fa39ec2af043a Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 3 Apr 2025 13:42:53 -0400 Subject: [PATCH 15/18] Removed deprecated nameof --- .../components/syntaxDiscourseFeats.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/awe_components/components/syntaxDiscourseFeats.py b/awe_components/components/syntaxDiscourseFeats.py index 0f51970..dba3503 100644 --- a/awe_components/components/syntaxDiscourseFeats.py +++ b/awe_components/components/syntaxDiscourseFeats.py @@ -45,21 +45,20 @@ class SyntaxAndDiscourseFeatDef(object): ) as filepath: TRANSITION_CATEGORIES_PATH = filepath - datapaths = [{'pathname': nameof(TRANSITION_TERMS_PATH), - 'value': TRANSITION_TERMS_PATH}, - {'pathname': nameof(TRANSITION_CATEGORIES_PATH), - 'value': TRANSITION_CATEGORIES_PATH}] - transition_terms = {} transition_categories = {} def package_check(self, lang): - for path in self.datapaths: - if not os.path.exists(path['value']): - raise LexiconMissingError( - "Trying to load AWE Workbench Lexicon Module \ - without {name} datafile".format(name=path['pathname']) - ) + if not os.path.exists(self.TRANSITION_TERMS_PATH): + raise LexiconMissingError( + "Trying to load AWE Workbench Syntax and Discourse Feature \ + Module without supporting datafile {}".format(self.TRANSITION_TERMS_PATH) + ) + if not os.path.exists(self.TRANSITION_CATEGORIES_PATH): + raise LexiconMissingError( + "Trying to load AWE Workbench Syntax and Discourse Feature \ + Module without supporting datafile {}".format(self.TRANSITION_CATEGORIES_PATH) + ) def load_lexicons(self, lang): self.transition_terms = \ From d87fa768ca656bb4ae2549616a8a0c8bf9c8a3f3 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 3 Apr 2025 13:58:41 -0400 Subject: [PATCH 16/18] Removed pesky import --- awe_components/components/syntaxDiscourseFeats.py | 1 - 1 file changed, 1 deletion(-) diff --git a/awe_components/components/syntaxDiscourseFeats.py b/awe_components/components/syntaxDiscourseFeats.py index dba3503..08d8acc 100644 --- a/awe_components/components/syntaxDiscourseFeats.py +++ b/awe_components/components/syntaxDiscourseFeats.py @@ -3,7 +3,6 @@ import os import srsly -from varname import nameof from spacy.tokens import Doc, Token from spacy.language import Language From 41758d3b8bbd866a434dc06361eb90487f690527 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 17 Apr 2025 11:47:31 -0400 Subject: [PATCH 17/18] Removed branch for lexica --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 66536f8..66901e6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,7 +31,7 @@ python_requires = >=3.9 cmdclass = install = install.AWEInstall install_requires = - awe_lexica @ git+https://github.com/ArgLab/AWE_Lexica.git@varname-patch + awe_lexica @ git+https://github.com/ArgLab/AWE_Lexica.git spacy coreferee rdflib From b0dd88176ae7aaab9487b039c43172d57b4a18e6 Mon Sep 17 00:00:00 2001 From: duckduckdoof Date: Thu, 24 Apr 2025 15:13:45 -0400 Subject: [PATCH 18/18] Updated readme on installation --- README.md | 39 ++++++++++----------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 5511a71..f601dcb 100755 --- a/README.md +++ b/README.md @@ -6,46 +6,27 @@ which can be installed into a Spacy pipeline. They annotate the Spacy parse tree with additional attributes that make it easy to summarize information about features of student writing. +Before You Install +------------ + +It is helpful to note that the use of AWE Components is best tested using [AWE_Workbench](https://github.com/ArgLab/AWE_Workbench), which utilizes the features defined in AWE Components. There are a series of automatic tests which can be run to verify or validate AWE Components; in addition, there are examples, a web server for parsing documents, and an interactive document highlighting tool for visualizing the document features which are derived from AWE Components. + +See AWE Workbench's installations steps and verify that you'd want to use it instead of installing AWE Components directly. + Installation ------------ -Set up Python 3.9. 3.8 will *not* work. If you wish to use `conda`: +Set up Python 3.11. If you wish to use `conda`: - conda create -n test_install python=3.9 pip + conda create -n test_install python=3.11 pip pip install pip --upgrade conda activate test_install If you wish to use plain old `pip` with `virtualenvwrapper`: - mkvirtualenv awe_components --python=/usr/bin/python3.9 + mkvirtualenv awe_components --python=/usr/bin/python3.11 pip install pip --upgrade -Install prerequisites: - -[Holmes Extractor Expandable](https://github.com/ETS-Next-Gen/holmes-extractor-expandable): - - git clone git@github.com:ETS-Next-Gen/holmes-extractor-expandable.git - cd holmes-extractor-expandable/~ - pip install . - -[AWE Language Tool](https://github.com/ETS-Next-Gen/AWE_LanguageTool): - - git clone git@github.com:ETS-Next-Gen/AWE_LanguageTool.git - cd AWE_LanguageTool/ - pip install . - -[AWE Spell Correct](https://github.com/ETS-Next-Gen/AWE_SpellCorrect) - - git clone git@github.com:ETS-Next-Gen/AWE_SpellCorrect.git - cd AWE_SpellCorrect/ - pip install . - -[AWE Lexica](https://github.com/ETS-Next-Gen/AWE_Lexica) - - git clone git@github.com:ETS-Next-Gen/AWE_Lexica.git - cd AWE_Lexica/ - pip install . - Then from the AWE Workbench Components directory: pip install .