From c0938a4b5d467e01bfea493a98460041d576a420 Mon Sep 17 00:00:00 2001 From: Jonas Reinsch Date: Thu, 19 Oct 2023 16:28:25 +0200 Subject: [PATCH] Simplify collection/sorting of all possible characters. --- makemore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makemore.py b/makemore.py index db0fe4c1..90bc552e 100644 --- a/makemore.py +++ b/makemore.py @@ -550,7 +550,7 @@ def create_datasets(input_file): words = data.splitlines() words = [w.strip() for w in words] # get rid of any leading or trailing white space words = [w for w in words if w] # get rid of any empty strings - chars = sorted(list(set(''.join(words)))) # all the possible characters + chars = sorted(set(''.join(words))) # all the possible characters max_word_length = max(len(w) for w in words) print(f"number of examples in the dataset: {len(words)}") print(f"max word length: {max_word_length}")