From 0307ff74975766649c64a626a812b3fade1ea439 Mon Sep 17 00:00:00 2001 From: karansdoshi Date: Sat, 22 Jun 2019 19:06:57 +0530 Subject: [PATCH 1/2] Scrapping of Rating Graphs 22-6-19 --- temp.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 temp.py diff --git a/temp.py b/temp.py new file mode 100644 index 0000000..800cab5 --- /dev/null +++ b/temp.py @@ -0,0 +1,42 @@ + +from fake_useragent import UserAgent +from bs4 import BeautifulSoup +import requests +import pandas as pd + +def get_rating_graphs(username): + page=requests.get("https://www.codechef.com/users/"+username,headers={'User-Agent': 'Mozilla/5.0'}) + soup = BeautifulSoup(page.content, 'html.parser') + x=str(soup.select('script[type="text/javascript"]')[29]).split(';')[9][19:].strip() + Code=[] + Year=[] + Month=[] + Day=[] + Reason=[] + Penalised_In=[] + Rating=[] + Rank=[] + Name=[] + End_Date=[] + y=x[1:-2].split('},') + null='None' + for i in range(len(y)): + y[i]=y[i]+'}' + for i in range(len(y)): + Code.append(eval(y[i])['code']) + Year.append(eval(y[i])['getyear']) + Month.append(eval(y[i])['getmonth']) + Day.append(eval(y[i])['getday']) + Reason.append(eval(y[i])['reason']) + Penalised_In.append(eval(y[i])['penalised_in']) + Rating.append(eval(y[i])['rating']) + Rank.append(eval(y[i])['rank']) + Name.append(eval(y[i])['name']) + End_Date.append(eval(y[i])['end_date']) + + dict = {'Code':Code , 'Year': Year, 'Month': Month,'Day':Day,'Reason':Reason,'Penalised_In':Penalised_In,'Rating':Rating,'Rank':Rank,'Name':Name,'End_Date':End_Date} + + df = pd.DataFrame(dict) + df.to_csv(username+'.csv') +get_rating_graphs('karansdoshi') +get_rating_graphs('physah86') From d45e5c519a0f3e1383f1c403514077bf09c23e1c Mon Sep 17 00:00:00 2001 From: karansdoshi Date: Sun, 14 Jul 2019 16:32:38 +0530 Subject: [PATCH 2/2] updated script for rating graphs --- temp.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/temp.py b/temp.py index 800cab5..93f259b 100644 --- a/temp.py +++ b/temp.py @@ -1,5 +1,4 @@ -from fake_useragent import UserAgent from bs4 import BeautifulSoup import requests import pandas as pd @@ -7,7 +6,7 @@ def get_rating_graphs(username): page=requests.get("https://www.codechef.com/users/"+username,headers={'User-Agent': 'Mozilla/5.0'}) soup = BeautifulSoup(page.content, 'html.parser') - x=str(soup.select('script[type="text/javascript"]')[29]).split(';')[9][19:].strip() + x=str(soup)[str(soup).find('var all_rating = ')+18:str(soup).find('var current_user_rating ')-6].strip() Code=[] Year=[] Month=[] @@ -18,7 +17,7 @@ def get_rating_graphs(username): Rank=[] Name=[] End_Date=[] - y=x[1:-2].split('},') + y=x.split('},') null='None' for i in range(len(y)): y[i]=y[i]+'}' @@ -38,5 +37,10 @@ def get_rating_graphs(username): df = pd.DataFrame(dict) df.to_csv(username+'.csv') -get_rating_graphs('karansdoshi') -get_rating_graphs('physah86') +usn=pd.read_csv('username.csv')['username'][0:100] +for i in usn: + try: + get_rating_graphs(i) + except: + continue +