From 98a037cf0202b7244f2ec297aa5caa0a179a2e38 Mon Sep 17 00:00:00 2001 From: iamdoron Date: Wed, 7 Dec 2022 12:42:35 +0200 Subject: [PATCH] remove duplicate words --- makemore.py | 1 + 1 file changed, 1 insertion(+) diff --git a/makemore.py b/makemore.py index db0fe4c1..4f60757b 100644 --- a/makemore.py +++ b/makemore.py @@ -550,6 +550,7 @@ def create_datasets(input_file): words = data.splitlines() words = [w.strip() for w in words] # get rid of any leading or trailing white space words = [w for w in words if w] # get rid of any empty strings + words = list(set(words)) # get rid of duplicates chars = sorted(list(set(''.join(words)))) # all the possible characters max_word_length = max(len(w) for w in words) print(f"number of examples in the dataset: {len(words)}")