From 65229db93861ec03c693def24ee9c99e80d22297 Mon Sep 17 00:00:00 2001 From: Farhad Hossain Date: Mon, 18 May 2020 00:44:11 +0600 Subject: [PATCH] Updated Main.py In the previous one, if one the word happy appeared twice in the read.txt it counts only one. Now it counts all of them. So we get more accurate results. --- main.py | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/main.py b/main.py index 0df22a8..5fc3803 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,5 @@ -import string from collections import Counter - +from string import punctuation import matplotlib.pyplot as plt # reading text file @@ -10,21 +9,13 @@ lower_case = text.lower() # Removing punctuations -cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation)) +cleaned_text = lower_case.translate(str.maketrans('', '', punctuation)) # splitting text into words tokenized_words = cleaned_text.split() -stop_words = ["i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", - "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", - "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", - "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", - "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", - "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", - "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", - "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", - "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", - "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"] +# get all stop words from the file +stop_words = open("stop_words.txt", encoding="utf-8").read().split() # Removing stop words from the tokenized words list final_words = [] @@ -47,17 +38,16 @@ clear_line = line.replace("\n", '').replace(",", '').replace("'", '').strip() word, emotion = clear_line.split(':') - if word in final_words: - emotion_list.append(emotion) + for f_word in final_words: + if f_word == word: + emotion_list.append(emotion.strip()) print(emotion_list) -w = Counter(emotion_list) -print(w) - -# Plotting the emotions on the graph +emo_counter = Counter(emotion_list) +print(emo_counter) -fig, ax1 = plt.subplots() -ax1.bar(w.keys(), w.values()) +fig, ax = plt.subplots() +ax.bar(emo_counter.keys(), emo_counter.values()) fig.autofmt_xdate() -plt.savefig('graph.png') +plt.savefig("emo.png") plt.show()