Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions images/dashboard/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ COPY ./dashboard.py /code/dashboard.py
COPY ./preload_archive.pickle /code/preload_archive.pickle
COPY ./preload_list.pickle /code/preload_list.pickle
COPY ./bigbangvendorgraph.py /code/bigbangvendorgraph.py
COPY ./bigbangwordtrend.py /code/bigbangwordtrend.py
COPY ./bigbang /code/bigbang

USER root
RUN python -m nltk.downloader popular
RUN python -m pip install -e /code/bigbang/

CMD ["panel", "serve","--warm", "dashboard.py", "--session-ids", "external-signed", "--port", "5006"]
68 changes: 68 additions & 0 deletions images/dashboard/bigbangwordtrend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright 2023 Priyanka Sinha

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from bigbang.archive import load as load_archive
from bigbang.archive import Archive
import bigbang.ingress.mailman as mailman
import bigbang.analysis.process as process
import networkx as nx
import pandas as pd
from pprint import pprint as pp
import pytz
import numpy as np
import math
import nltk
from itertools import repeat
from nltk.stem.lancaster import LancasterStemmer
st = LancasterStemmer()
from nltk.corpus import stopwords
import re

__all__ = ["get_word_trends"]

stem = False

def count_word(text,word):
if not text:
return 0

if len(word.split(" ")) <= 1:
## normalize the text - remove apostrophe and punctuation, lower case
normalized_text = re.sub(r'[^\w]', ' ',text.replace("'","")).lower()

tokenized_text = nltk.tokenize.word_tokenize(normalized_text)

if stem:
tokenized_text = [st.stem(t) for t in tokenized_text]

return tokenized_text.count(word)
else:
return text.lower().count(word)


def get_word_trends(archive):

archives_data = archive

checkwords = ["protocol","middlebox","standard","chair"]

for word in checkwords:
archives_data[word] = archives_data['Body'].apply(lambda x: count_word(x,word))

archives_data = archives_data.dropna(subset=['Date'])
archives_data['Date-ordinal'] = archives_data['Date'].apply(lambda x: x.toordinal())
archives_data_sums = archives_data.groupby('Date-ordinal').sum()

return archives_data_sums
23 changes: 23 additions & 0 deletions images/dashboard/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import hvplot.networkx as hvnx
from datetime import date
import bigbangvendorgraph as graph
import bigbangwordtrend as wordtrend
import networkx as nx
import matplotlib.pyplot as plt
import pickle
Expand Down Expand Up @@ -51,6 +52,20 @@ def get_top_senders(archive_select):
return top_senders.rename("Number of Emails")


@pn.depends(archive_select=archive_select_widget)
def plot_wordtrends(archive_select):
archive = preload_archive[archive_select]
df = archive.data.copy()
trends = wordtrend.get_word_trends(df)
checkwords = ["protocol","middlebox","standard","chair"]
window = 5
colors = 'rgbkm'
for i in range(len(checkwords)):
smooth_sums = trends.rolling(window).mean()
smooth_sums[checkwords[i]].hvplot.line(x='Date',value_label=checkwords[i])



@pn.depends(archive_select=archive_select_widget)
def plot_interactions(archive_select):
archive = preload_archive[archive_select]
Expand Down Expand Up @@ -111,6 +126,13 @@ def plot_interactions(archive_select):
plot_daily_activity,
)

plot_wordtrends_boxed = pn.Column(
pn.pane.Markdown(
"#### This plot show the occurrence of selected words in the mailing list over time."
),
plot_wordtrends,
)

get_top_senders_boxed = pn.Column(
pn.pane.Markdown(
"#### This table shows the information of the top senders to the mailing list, such as their name, their email address, and the amount of email they have sent."
Expand Down Expand Up @@ -156,6 +178,7 @@ def plot_interactions(archive_select):
pn.Column(
archive_select_widget_boxed,
pn.Row(plot_daily_activity_boxed, plot_interactions_boxed),
pn.Row(plot_wordtrends_boxed),
get_top_senders_boxed,
)
)
Expand Down
1 change: 1 addition & 0 deletions images/dashboard/env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ dependencies:
- networkx=3.0
- matplotlib=3.7.0
- scipy=1.10.1
- nltk=3.8.1