From 513f9e803e36e88b92b64fe6684a58c30189c718 Mon Sep 17 00:00:00 2001 From: Paul Selzner <78799570+pselzner@users.noreply.github.com> Date: Fri, 5 Dec 2025 15:00:30 +0100 Subject: [PATCH] set lemmatization as default --- main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index d48e02e..230d15f 100644 --- a/main.py +++ b/main.py @@ -41,7 +41,7 @@ class BIBFileInput(FileSettings, InputSettings): class PreprocessTXT(EnvSettings): LANGUAGE: str = "en" FILTER_STOPWORDS: bool = True - UNIGRAM_NORMALIZER: str = "porter" + UNIGRAM_NORMALIZER: str = "lemma" USE_NGRAMS: bool = True NGRAM_MIN: int = 2 NGRAM_MAX: int = 3 @@ -54,7 +54,7 @@ class PreprocessTXT(EnvSettings): class PreprocessBIB(EnvSettings): LANGUAGE: str = "en" FILTER_STOPWORDS: bool = True - UNIGRAM_NORMALIZER: str = "porter" + UNIGRAM_NORMALIZER: str = "lemma" USE_NGRAMS: bool = True NGRAM_MIN: int = 2 NGRAM_MAX: int = 3