From edaacdf6b3dea5d47b0035f000f35598e62f5a71 Mon Sep 17 00:00:00 2001 From: j105rob Date: Tue, 18 Nov 2014 12:32:52 -0500 Subject: [PATCH 01/42] a little refactoring on the way to making a pluggable architecture. --- dumpmon.py | 18 +++++++++++++----- lib/Paste.py | 21 +++++++++++++++++++-- lib/Pastebin.py | 31 ++++++++++++++----------------- lib/Pastie.py | 27 +++++++++++---------------- lib/Site.py | 37 +++++++++++++++++++++++++++++++++---- lib/Slexy.py | 30 +++++++++++++----------------- lib/helper.py | 7 ++++--- 7 files changed, 107 insertions(+), 64 deletions(-) diff --git a/dumpmon.py b/dumpmon.py index a315673..81213a4 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -14,7 +14,7 @@ from lib.Pastie import Pastie, PastiePaste from lib.helper import log from time import sleep -from twitter import Twitter, OAuth +from twitter import * from settings import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET, log_file import threading import logging @@ -30,30 +30,38 @@ def monitor(): parser.add_argument( "-v", "--verbose", help="more verbose", action="store_true") args = parser.parse_args() + level = logging.INFO if args.verbose: level = logging.DEBUG + logging.basicConfig( format='%(asctime)s [%(levelname)s] %(message)s', filename=log_file, level=level) logging.info('Monitoring...') + bot = Twitter( auth=OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET) ) + # Create lock for both output log and tweet action log_lock = threading.Lock() tweet_lock = threading.Lock() pastebin_thread = threading.Thread( target=Pastebin().monitor, args=[bot, tweet_lock]) + pastebin_thread.daemon = True + pastebin_thread.start() + slexy_thread = threading.Thread( target=Slexy().monitor, args=[bot, tweet_lock]) + slexy_thread.daemon = True + slexy_thread.start() + pastie_thead = threading.Thread( target=Pastie().monitor, args=[bot, tweet_lock]) - - for thread in (pastebin_thread, slexy_thread, pastie_thead): - thread.daemon = True - thread.start() + pastie_thead.daemon = True + pastie_thead.start() # Let threads run try: diff --git a/lib/Paste.py b/lib/Paste.py index bc379c6..e1f5c23 100644 --- a/lib/Paste.py +++ b/lib/Paste.py @@ -4,11 +4,12 @@ import re class Paste(object): - def __init__(self): + def __init__(self,id): ''' class Paste: Generic "Paste" object to contain attributes of a standard paste ''' + self.id = id self.emails = 0 self.hashes = 0 self.num_emails = 0 @@ -17,7 +18,11 @@ class Paste: Generic "Paste" object to contain attributes of a standard paste self.type = None self.sites = None self.db_keywords = 0.0 - + + def __eq__(self,comparePaste): + #logging.info('id %s compares to %s'%(self.id, comparePaste.id)) + return self.id == comparePaste.id + def match(self): ''' Matches the paste against a series of regular expressions to determine if the paste is 'interesting' @@ -33,32 +38,44 @@ def match(self): ''' # Get the amount of emails self.emails = list(set(regexes['email'].findall(self.text))) + self.hashes = regexes['hash32'].findall(self.text) + self.num_emails = len(self.emails) + self.num_hashes = len(self.hashes) + if self.num_emails > 0: self.sites = list(set([re.search('@(.*)$', email).group(1).lower() for email in self.emails])) + for regex in regexes['db_keywords']: if regex.search(self.text): logging.debug('\t[+] ' + regex.search(self.text).group(1)) self.db_keywords += round(1/float( len(regexes['db_keywords'])), 2) + for regex in regexes['blacklist']: if regex.search(self.text): logging.debug('\t[-] ' + regex.search(self.text).group(1)) self.db_keywords -= round(1.25 * ( 1/float(len(regexes['db_keywords']))), 2) + if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD): self.type = 'db_dump' + if regexes['cisco_hash'].search(self.text) or regexes['cisco_pass'].search(self.text): self.type = 'cisco' + if regexes['honeypot'].search(self.text): self.type = 'honeypot' + if regexes['google_api'].search(self.text): self.type = 'google_api' + # if regexes['juniper'].search(self.text): self.type = 'Juniper' for regex in regexes['banlist']: if regex.search(self.text): self.type = None break + return self.type diff --git a/lib/Pastebin.py b/lib/Pastebin.py index e9656cd..a66728c 100644 --- a/lib/Pastebin.py +++ b/lib/Pastebin.py @@ -10,11 +10,9 @@ class PastebinPaste(Paste): def __init__(self, id): - self.id = id + super(PastebinPaste, self).__init__(id) self.headers = None self.url = 'http://pastebin.com/raw.php?i=' + self.id - super(PastebinPaste, self).__init__() - class Pastebin(Site): def __init__(self, last_id=None): @@ -24,23 +22,22 @@ def __init__(self, last_id=None): self.BASE_URL = 'http://pastebin.com' self.sleep = SLEEP_PASTEBIN super(Pastebin, self).__init__() - + + def parse(self): + return BeautifulSoup(helper.download(self.BASE_URL + '/archive')).find_all( + lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:]) + def update(self): '''update(self) - Fill Queue with new Pastebin IDs''' logging.info('Retrieving Pastebin ID\'s') - results = BeautifulSoup(helper.download(self.BASE_URL + '/archive')).find_all( - lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:]) - new_pastes = [] - if not self.ref_id: - results = results[:60] - for entry in results: + i=0 + for entry in self.parse(): paste = PastebinPaste(entry.a['href'][1:]) - # Check to see if we found our last checked URL - if paste.id == self.ref_id: - break - new_pastes.append(paste) - for entry in new_pastes[::-1]: - logging.info('Adding URL: ' + entry.url) - self.put(entry) + if not self.hasSeen(paste): + #logging.info('Adding URL: ' + paste.url) + i+=1 + self.put(paste) + logging.info('Pastebin Added URLs: ' + str(i)) + def get_paste_text(self, paste): return helper.download(paste.url) diff --git a/lib/Pastie.py b/lib/Pastie.py index da84e55..7b090d6 100644 --- a/lib/Pastie.py +++ b/lib/Pastie.py @@ -10,11 +10,9 @@ class PastiePaste(Paste): def __init__(self, id): - self.id = id + super(PastiePaste, self).__init__(id) self.headers = None self.url = 'http://pastie.org/pastes/' + self.id + '/text' - super(PastiePaste, self).__init__() - class Pastie(Site): def __init__(self, last_id=None): @@ -24,25 +22,22 @@ def __init__(self, last_id=None): self.BASE_URL = 'http://pastie.org' self.sleep = SLEEP_PASTIE super(Pastie, self).__init__() + + def parse(self): + return [tag for tag in BeautifulSoup(helper.download( + self.BASE_URL + '/pastes')).find_all('p', 'link') if tag.a] def update(self): '''update(self) - Fill Queue with new Pastie IDs''' logging.info('Retrieving Pastie ID\'s') - results = [tag for tag in BeautifulSoup(helper.download( - self.BASE_URL + '/pastes')).find_all('p', 'link') if tag.a] - new_pastes = [] - if not self.ref_id: - results = results[:60] - for entry in results: + i=0 + for entry in self.parse(): paste = PastiePaste(entry.a['href'].replace( self.BASE_URL + '/pastes/', '')) - # Check to see if we found our last checked URL - if paste.id == self.ref_id: - break - new_pastes.append(paste) - for entry in new_pastes[::-1]: - logging.debug('Adding URL: ' + entry.url) - self.put(entry) + if not self.hasSeen(paste): + i+=1 + self.put(paste) + logging.info('Pastie Added URLs: ' + str(i)) def get_paste_text(self, paste): return BeautifulSoup(helper.download(paste.url)).pre.text \ No newline at end of file diff --git a/lib/Site.py b/lib/Site.py index 6ab60ef..192a560 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -5,10 +5,11 @@ from pymongo import MongoClient from requests import ConnectionError from twitter import TwitterError -from settings import USE_DB, DB_HOST, DB_PORT +from settings import USE_DB, DB_HOST, DB_PORT, SEEN_DEQUE_LEN import logging import helper +from collections import deque class Site(object): ''' @@ -31,19 +32,32 @@ class Site(object): # that I could find... So, I decided to implement my own queue with a few # changes def __init__(self, queue=None): + + # the double ended queue is used to check the last n URLs to see if they have been processed, since the URLs are random strings. + self.seen = deque(maxlen=SEEN_DEQUE_LEN) + if queue is None: self.queue = [] if USE_DB: # Lazily create the db and collection if not present self.db_client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes - + def addSeen(self,item): + self.seen.append(item) + #logging.info('[@] Site deque len %i'%(len(self.seen))) + + def hasSeen(self,item): + res = item in self.seen + #logging.info('[@] URL Seen %s %s'%(item.url,res)) + return res + def empty(self): return len(self.queue) == 0 def get(self): if not self.empty(): result = self.queue[0] + self.addSeen(result) del self.queue[0] else: result = None @@ -67,10 +81,24 @@ def clear(self): def list(self): print('\n'.join(url for url in self.queue)) + def parse(self): + #override this + pass + + def update(self): + #override this + pass + + def get_paste_text(self): + #override this + pass + def monitor(self, bot, t_lock): self.update() while(1): while not self.empty(): + #need to sleep to avoid the ban.... + time.sleep(self.sleep/4) paste = self.get() self.ref_id = paste.id logging.info('[*] Checking ' + paste.url) @@ -92,9 +120,10 @@ def monitor(self, bot, t_lock): 'url' : paste.url }) try: + logging.debug('[++++++++++++] Tweet %s'%(tweet)) bot.statuses.update(status=tweet) - except TwitterError: - pass + except TwitterError as e: + logging.debug('[!] TwitterError %s'%(str(e))) self.update() while self.empty(): logging.debug('[*] No results... sleeping') diff --git a/lib/Slexy.py b/lib/Slexy.py index 3876c81..c7fc757 100644 --- a/lib/Slexy.py +++ b/lib/Slexy.py @@ -10,11 +10,9 @@ class SlexyPaste(Paste): def __init__(self, id): - self.id = id + super(SlexyPaste, self).__init__(id) self.headers = {'Referer': 'http://slexy.org/view/' + self.id} self.url = 'http://slexy.org/raw/' + self.id - super(SlexyPaste, self).__init__() - class Slexy(Site): def __init__(self, last_id=None): @@ -24,24 +22,22 @@ def __init__(self, last_id=None): self.BASE_URL = 'http://slexy.org' self.sleep = SLEEP_SLEXY super(Slexy, self).__init__() - + + def parse(self): + return BeautifulSoup(helper.download(self.BASE_URL + '/recent')).find_all( + lambda tag: tag.name == 'td' and tag.a and '/view/' in tag.a['href']) + def update(self): '''update(self) - Fill Queue with new Slexy IDs''' logging.info('[*] Retrieving Slexy ID\'s') - results = BeautifulSoup(helper.download(self.BASE_URL + '/recent')).find_all( - lambda tag: tag.name == 'td' and tag.a and '/view/' in tag.a['href']) - new_pastes = [] - if not self.ref_id: - results = results[:60] - for entry in results: + + i=0 + for entry in self.parse(): paste = SlexyPaste(entry.a['href'].replace('/view/', '')) - # Check to see if we found our last checked URL - if paste.id == self.ref_id: - break - new_pastes.append(paste) - for entry in new_pastes[::-1]: - logging.info('[+] Adding URL: ' + entry.url) - self.put(entry) + if not self.hasSeen(paste): + i+=1 + self.put(paste) + logging.info('Slexy Added URLs: ' + str(i)) def get_paste_text(self, paste): return helper.download(paste.url, paste.headers) diff --git a/lib/helper.py b/lib/helper.py index bccfb02..585d705 100644 --- a/lib/helper.py +++ b/lib/helper.py @@ -11,14 +11,14 @@ r = requests.Session() - - + def download(url, headers=None): if not headers: headers = None if headers: r.headers.update(headers) try: + logging.info(url) response = r.get(url).text except requests.ConnectionError: logging.warn('[!] Critical Error - Cannot connect to site') @@ -45,6 +45,7 @@ def build_tweet(paste): ''' tweet = None if paste.match(): + logging.info('Paste Matched') tweet = paste.url if paste.type == 'db_dump': if paste.num_emails > 0: @@ -63,7 +64,7 @@ def build_tweet(paste): tweet += ' Possible SSH private key' elif paste.type == 'honeypot': tweet += ' Dionaea Honeypot Log' - tweet += ' #infoleak' + tweet += ' #infosec #dataleak' if paste.num_emails > 0: print(paste.emails) return tweet From 3b6a081cc6f12332d16cc2372c756e3936e72ca6 Mon Sep 17 00:00:00 2001 From: j105rob Date: Wed, 19 Nov 2014 08:30:02 -0500 Subject: [PATCH 02/42] moved bot out into its own class; starting to add dynamic capabilities to make the bot interactive --- dumpmon.py | 38 +++++++++++++++----------------------- lib/Site.py | 6 +++--- lib/TwitterBot.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 26 deletions(-) create mode 100644 lib/TwitterBot.py diff --git a/dumpmon.py b/dumpmon.py index 81213a4..15a7b69 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -13,9 +13,9 @@ from lib.Slexy import Slexy, SlexyPaste from lib.Pastie import Pastie, PastiePaste from lib.helper import log +from lib.TwitterBot import TwitterBot from time import sleep -from twitter import * -from settings import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET, log_file +from settings import log_file import threading import logging @@ -36,32 +36,24 @@ def monitor(): level = logging.DEBUG logging.basicConfig( - format='%(asctime)s [%(levelname)s] %(message)s', filename=log_file, level=level) + format='%(asctime)s [%(levelname)s] %(funcName)s %(module)s %(message)s', filename=log_file, level=level) logging.info('Monitoring...') - bot = Twitter( - auth=OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET, - CONSUMER_KEY, CONSUMER_SECRET) - ) + bot = TwitterBot() - # Create lock for both output log and tweet action + # Create lock for output log log_lock = threading.Lock() - tweet_lock = threading.Lock() - - pastebin_thread = threading.Thread( - target=Pastebin().monitor, args=[bot, tweet_lock]) - pastebin_thread.daemon = True - pastebin_thread.start() - slexy_thread = threading.Thread( - target=Slexy().monitor, args=[bot, tweet_lock]) - slexy_thread.daemon = True - slexy_thread.start() - - pastie_thead = threading.Thread( - target=Pastie().monitor, args=[bot, tweet_lock]) - pastie_thead.daemon = True - pastie_thead.start() + def createThread(target,*args,**kwargs): + t = threading.Thread(target=target, args=args, kwargs=kwargs) + t.daemon = True + t.start() + + createThread(bot.monitor) + createThread(bot.test) + #createThread(Pastebin().monitor,bot) + #createThread(Slexy().monitor,bot) + #createThread(Pastie().monitor,bot) # Let threads run try: diff --git a/lib/Site.py b/lib/Site.py index 192a560..f9f0399 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -93,12 +93,12 @@ def get_paste_text(self): #override this pass - def monitor(self, bot, t_lock): + def monitor(self, bot): self.update() while(1): while not self.empty(): #need to sleep to avoid the ban.... - time.sleep(self.sleep/4) + #time.sleep(self.sleep/4) paste = self.get() self.ref_id = paste.id logging.info('[*] Checking ' + paste.url) @@ -106,7 +106,7 @@ def monitor(self, bot, t_lock): tweet = helper.build_tweet(paste) if tweet: logging.info(tweet) - with t_lock: + with bot.tweetLock: if USE_DB: self.db_client.save({ 'pid' : paste.id, diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py new file mode 100644 index 0000000..2b2d91a --- /dev/null +++ b/lib/TwitterBot.py @@ -0,0 +1,31 @@ +from twitter import * +from settings import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET, log_file +import logging +import time +import threading + +class TwitterBot(Twitter): + """ + Subclassing the Twitter API and botifying it + """ + def __init__(self): + super(TwitterBot, self).__init__(auth=OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET, + CONSUMER_KEY, CONSUMER_SECRET)) + + self.tweetLock= threading.Lock() + + def test(self): + while(True): + print("Doing stuff") + time.sleep(2) + + def monitor(self): + twitter_userstream = TwitterStream(auth=self.auth, domain='userstream.twitter.com') + try: + for msg in twitter_userstream.user(): + if 'text' in msg: + print("[$] Recieved Tweet %s from %s"%(msg['text'],msg['user']['screen_name'])) + + except StopIteration: + print("stopping iteration") + \ No newline at end of file From c0a0f065e7fe72fd7c87aa442975c2ea26b751f5 Mon Sep 17 00:00:00 2001 From: j105rob Date: Wed, 19 Nov 2014 10:30:31 -0500 Subject: [PATCH 03/42] first cut at stats for the dumpmon database --- dumpmon.py | 12 +++++---- lib/Pastebin.py | 5 +--- lib/Pastie.py | 5 +--- lib/Site.py | 2 +- lib/Slexy.py | 5 +--- lib/Stats.py | 68 +++++++++++++++++++++++++++++++++++++++++++++++ lib/TwitterBot.py | 3 +++ 7 files changed, 82 insertions(+), 18 deletions(-) create mode 100644 lib/Stats.py diff --git a/dumpmon.py b/dumpmon.py index 15a7b69..b9986df 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -14,6 +14,7 @@ from lib.Pastie import Pastie, PastiePaste from lib.helper import log from lib.TwitterBot import TwitterBot +from lib.Stats import Stats from time import sleep from settings import log_file import threading @@ -36,7 +37,7 @@ def monitor(): level = logging.DEBUG logging.basicConfig( - format='%(asctime)s [%(levelname)s] %(funcName)s %(module)s %(message)s', filename=log_file, level=level) + format='%(asctime)s [%(levelname)s] [%(module)s] [%(funcName)s] %(message)s', filename=log_file, level=level) logging.info('Monitoring...') bot = TwitterBot() @@ -50,10 +51,11 @@ def createThread(target,*args,**kwargs): t.start() createThread(bot.monitor) - createThread(bot.test) - #createThread(Pastebin().monitor,bot) - #createThread(Slexy().monitor,bot) - #createThread(Pastie().monitor,bot) + createThread(Stats().monitor,bot) + + createThread(Pastebin().monitor,bot) + createThread(Slexy().monitor,bot) + createThread(Pastie().monitor,bot) # Let threads run try: diff --git a/lib/Pastebin.py b/lib/Pastebin.py index a66728c..fc5dde0 100644 --- a/lib/Pastebin.py +++ b/lib/Pastebin.py @@ -15,10 +15,7 @@ def __init__(self, id): self.url = 'http://pastebin.com/raw.php?i=' + self.id class Pastebin(Site): - def __init__(self, last_id=None): - if not last_id: - last_id = None - self.ref_id = last_id + def __init__(self): self.BASE_URL = 'http://pastebin.com' self.sleep = SLEEP_PASTEBIN super(Pastebin, self).__init__() diff --git a/lib/Pastie.py b/lib/Pastie.py index 7b090d6..2c84de5 100644 --- a/lib/Pastie.py +++ b/lib/Pastie.py @@ -15,10 +15,7 @@ def __init__(self, id): self.url = 'http://pastie.org/pastes/' + self.id + '/text' class Pastie(Site): - def __init__(self, last_id=None): - if not last_id: - last_id = None - self.ref_id = last_id + def __init__(self): self.BASE_URL = 'http://pastie.org' self.sleep = SLEEP_PASTIE super(Pastie, self).__init__() diff --git a/lib/Site.py b/lib/Site.py index f9f0399..af8309f 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -38,6 +38,7 @@ def __init__(self, queue=None): if queue is None: self.queue = [] + if USE_DB: # Lazily create the db and collection if not present self.db_client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes @@ -100,7 +101,6 @@ def monitor(self, bot): #need to sleep to avoid the ban.... #time.sleep(self.sleep/4) paste = self.get() - self.ref_id = paste.id logging.info('[*] Checking ' + paste.url) paste.text = self.get_paste_text(paste) tweet = helper.build_tweet(paste) diff --git a/lib/Slexy.py b/lib/Slexy.py index c7fc757..c6dc0d8 100644 --- a/lib/Slexy.py +++ b/lib/Slexy.py @@ -15,10 +15,7 @@ def __init__(self, id): self.url = 'http://slexy.org/raw/' + self.id class Slexy(Site): - def __init__(self, last_id=None): - if not last_id: - last_id = None - self.ref_id = last_id + def __init__(self): self.BASE_URL = 'http://slexy.org' self.sleep = SLEEP_SLEXY super(Slexy, self).__init__() diff --git a/lib/Stats.py b/lib/Stats.py new file mode 100644 index 0000000..35373a1 --- /dev/null +++ b/lib/Stats.py @@ -0,0 +1,68 @@ +from pymongo import MongoClient +from bson import Code + +from settings import USE_DB, DB_HOST, DB_PORT, STATS_FREQ + +import logging +import time + +class Stats(object): + def __init__(self): + if USE_DB: + # Lazily create the db and collection if not present + try: + self.client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes + except pymongo.errors.ConnectionFailure, e: + logging.error('[!] Database failed to start %s'%(e)) + + def uniqueEmailSet(self): + map = Code("function () {" + " this.emails.forEach(function(z) {" + " emit(z,1);" + " });" + "}") + reduce = Code("function (key,values) {" + "var total = 0;" + "for (var i = 0; i Date: Wed, 19 Nov 2014 10:40:02 -0500 Subject: [PATCH 04/42] added twitterror import in stats class --- lib/Stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Stats.py b/lib/Stats.py index 35373a1..8542eab 100644 --- a/lib/Stats.py +++ b/lib/Stats.py @@ -1,6 +1,6 @@ from pymongo import MongoClient from bson import Code - +from twitter import TwitterError from settings import USE_DB, DB_HOST, DB_PORT, STATS_FREQ import logging From a802c8c4f8ea55274f21af46a25518781ddf532b Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 20 Nov 2014 07:29:01 -0500 Subject: [PATCH 05/42] added TwitterBot protocol --- dumpmon.py | 10 ++++---- lib/RegexMgr.py | 23 ++++++++++++++++++ lib/Stats.py | 36 ++++++++++++++++----------- lib/TwitterBot.py | 59 +++++++++++++++++++++++++++++++++++++++------ settings.py-example | 6 +++++ 5 files changed, 108 insertions(+), 26 deletions(-) create mode 100644 lib/RegexMgr.py diff --git a/dumpmon.py b/dumpmon.py index b9986df..8ec20a9 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -37,7 +37,7 @@ def monitor(): level = logging.DEBUG logging.basicConfig( - format='%(asctime)s [%(levelname)s] [%(module)s] [%(funcName)s] %(message)s', filename=log_file, level=level) + format='%(asctime)s [%(levelname)s] [%(module)s] [%(funcName)s] %(message)s', filename=log_file, level=level) logging.info('Monitoring...') bot = TwitterBot() @@ -51,11 +51,11 @@ def createThread(target,*args,**kwargs): t.start() createThread(bot.monitor) - createThread(Stats().monitor,bot) + #createThread(Stats().monitor,bot) - createThread(Pastebin().monitor,bot) - createThread(Slexy().monitor,bot) - createThread(Pastie().monitor,bot) + #createThread(Pastebin().monitor,bot) + #createThread(Slexy().monitor,bot) + #createThread(Pastie().monitor,bot) # Let threads run try: diff --git a/lib/RegexMgr.py b/lib/RegexMgr.py new file mode 100644 index 0000000..8508750 --- /dev/null +++ b/lib/RegexMgr.py @@ -0,0 +1,23 @@ +import re +from pymongo import MongoClient +from settings import USE_DB, DB_HOST, DB_PORT + +class RegexMgr(object): + def __init__(self): + if USE_DB: + try: + self.client = MongoClient(DB_HOST, DB_PORT).paste_db.regexes + except pymongo.errors.ConnectionFailure, e: + logging.error('[!] Database failed to start %s'%(e)) + + def add(self,regex, user): + if self.valid(regex): + return True + + def valid(self,regex): + try: + re.compile(regex) + is_valid = True + except re.error: + is_valid = False + return is_valid \ No newline at end of file diff --git a/lib/Stats.py b/lib/Stats.py index 8542eab..3548e10 100644 --- a/lib/Stats.py +++ b/lib/Stats.py @@ -9,7 +9,6 @@ class Stats(object): def __init__(self): if USE_DB: - # Lazily create the db and collection if not present try: self.client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes except pymongo.errors.ConnectionFailure, e: @@ -46,22 +45,31 @@ def uniqueHashSet(self): "}") result = self.client.map_reduce(map,reduce,"res") return result - + + def status(self): + if not USE_DB: + logging.warning("[!] Not going to produce Stats because DB is off.") + return None + try: + e = self.uniqueEmailSet().count() + h = self.uniqueHashSet().count() + msg = "Status as of %s: \n Unique emails: %i, Unique hashes: %i\n #infosec #dataleak"%(time.strftime("%c"),e,h) + return msg + except Exception,e: + logging.error('[!] Database Error %s'%(e)) + return None + def monitor(self,twitterBot): while(True): - if not USE_DB: - logging.warning("[!] Not going to produce Stats because DB is off.") - return try: - e = self.uniqueEmailSet().count() - h = self.uniqueHashSet().count() - msg = "Dump Monitor status: \n Unique emails: %i, Unique hashes: %i\n #infosec #dataleak"%(e,h) - with twitterBot.tweetLock: - try: - logging.debug('[++++++++++++] Status Tweet %s'%(msg)) - twitterBot.statuses.update(status=msg) - except TwitterError as e: - logging.debug('[!] TwitterError %s'%(str(e))) + msg = self.status() + if msg: + with twitterBot.tweetLock: + try: + logging.debug('[++++++++++++] Status Tweet %s'%(msg)) + twitterBot.statuses.update(status=msg) + except TwitterError as e: + logging.debug('[!] TwitterError %s'%(str(e))) except Exception,e: logging.error('[!] Database Error %s'%(e)) diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index 30c585e..8427594 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -1,8 +1,10 @@ from twitter import * -from settings import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET, log_file +from settings import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET, log_file, TWITTER_SCREEN_NAME import logging import time import threading +from lib.RegexMgr import RegexMgr +from lib.Stats import Stats class TwitterBot(Twitter): """ @@ -13,12 +15,44 @@ def __init__(self): CONSUMER_KEY, CONSUMER_SECRET)) self.tweetLock= threading.Lock() - - def test(self): - while(True): - print("Doing stuff") - time.sleep(2) - + + #might not really be the right place for this, but for now. + self.regexMgr = RegexMgr() + self.statusMgr = Stats() + + def status(self,aryDM,user): + return self.statusMgr.status() + + def addregex(self,aryDM,user): + response = None + if self.regexMgr.add(text,user): + response = "Your regex has been added! Thanks!" + else: + response = "I could not add your regex, it didn't validate. :(" + return response + + def _parseTweet(self,dm,t): + """ + Probably should make this a protocol, but we'll see + """ + logging.info('[+] Processing DM request: %s Screen Name: %s'%(dm['text'],dm['sender']['screen_name'])) + response = None + #assume that we are going to use a space delim protocol and the ary[0] is the function name to call. + aryDM = dm['text'].split() + f = getattr(self,aryDM[0]) + if f: + response = f(aryDM,dm['sender']['screen_name']) + logging.info('[+] Sending DM response: %s Screen Name: %s'%(response,dm['sender']['screen_name'])) + else: + logging.error('[!] Could not find function in protocol: %s Screen Name: %s'%(aryDM[0],dm['sender']['screen_name'])) + + if response: + with self.tweetLock: + try: + self.direct_messages.new(user=dm['sender']['screen_name'],text=response) + except TwitterError as e: + logging.debug('[!] TwitterError %s'%(str(e))) + def monitor(self): """ This stream function is blocking and will not yield, thus does not need to be in a loop; refer to the docs @@ -26,9 +60,20 @@ def monitor(self): twitter_userstream = TwitterStream(auth=self.auth, domain='userstream.twitter.com') try: for msg in twitter_userstream.user(): + #logging.debug("{^} %s"%(msg)) if 'text' in msg: print("[$] Recieved Tweet %s from %s"%(msg['text'],msg['user']['screen_name'])) + + #process DMs, but only from other people + if 'direct_message' in msg and msg['direct_message']['sender']['screen_name'] != TWITTER_SCREEN_NAME: + self._parseTweet(msg['direct_message'],msg) except StopIteration: print("stopping iteration") + + + + + + \ No newline at end of file diff --git a/settings.py-example b/settings.py-example index 1b8ca26..3e2d4df 100644 --- a/settings.py-example +++ b/settings.py-example @@ -23,3 +23,9 @@ SLEEP_PASTIE = 30 # Other configuration tweet_history = "tweet.history" log_file = "output.log" + +#this needs to be set to the max you'd expect to pull from a single page!! +# otherwise you will get a rolling queue and possible process parsed URLs more than once +SEEN_DEQUE_LEN = 500 + +STATS_FREQ = 60*60 From 74d4c480c445a6862debca30beb6d13601a3062c Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 20 Nov 2014 08:30:19 -0500 Subject: [PATCH 06/42] added custom regex & persistance into DB --- dumpmon.py | 6 ++++-- lib/RegexMgr.py | 42 ++++++++++++++++++++++++++++++++++++++++-- lib/Site.py | 2 +- lib/Stats.py | 2 +- lib/TwitterBot.py | 12 +++++++----- settings.py-example | 8 ++++++-- 6 files changed, 59 insertions(+), 13 deletions(-) diff --git a/dumpmon.py b/dumpmon.py index 8ec20a9..cec2f5a 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -14,6 +14,7 @@ from lib.Pastie import Pastie, PastiePaste from lib.helper import log from lib.TwitterBot import TwitterBot +from lib.RegexMgr import RegexMgr from lib.Stats import Stats from time import sleep from settings import log_file @@ -40,8 +41,9 @@ def monitor(): format='%(asctime)s [%(levelname)s] [%(module)s] [%(funcName)s] %(message)s', filename=log_file, level=level) logging.info('Monitoring...') - bot = TwitterBot() - + regexMgr = RegexMgr() + bot = TwitterBot(regexMgr) + # Create lock for output log log_lock = threading.Lock() diff --git a/lib/RegexMgr.py b/lib/RegexMgr.py index 8508750..f9c8d46 100644 --- a/lib/RegexMgr.py +++ b/lib/RegexMgr.py @@ -1,17 +1,50 @@ import re from pymongo import MongoClient from settings import USE_DB, DB_HOST, DB_PORT +import time +import logging + +import threading class RegexMgr(object): - def __init__(self): + """ + This class is intended to handle all the regex stuff and persistance to the DB for observers + """ + def __init__(self): + self.regexLock = threading.Lock() + if USE_DB: try: self.client = MongoClient(DB_HOST, DB_PORT).paste_db.regexes except pymongo.errors.ConnectionFailure, e: logging.error('[!] Database failed to start %s'%(e)) + self.customRegexes = [] + self._loadRegexes() + + + def _loadRegexes(self): + with self.regexLock: + cursor = self.client.find() + for row in cursor: + customRegex = {} + rc = re.compile(row['regex']) + customRegex['regex'] = rc + customRegex['user'] = row['user'] + customRegex['added'] = time.strftime("%c") + self.customRegexes.append(customRegex) + + logging.info("[+] Loaded custom regexes: %s"%(self.customRegexes)) + + def reloadCustomRegexes(self): + self.customRegexes = [] + self._loadRegexes() + def add(self,regex, user): if self.valid(regex): + o = {"user":user,"regex":regex,"added":time.strftime("%c")} + self.client.insert(o) + self.customRegexes.append(o) return True def valid(self,regex): @@ -20,4 +53,9 @@ def valid(self,regex): is_valid = True except re.error: is_valid = False - return is_valid \ No newline at end of file + return is_valid + + + + + \ No newline at end of file diff --git a/lib/Site.py b/lib/Site.py index af8309f..5b09b4c 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -120,7 +120,7 @@ def monitor(self, bot): 'url' : paste.url }) try: - logging.debug('[++++++++++++] Tweet %s'%(tweet)) + logging.debug('[+] Tweet %s'%(tweet)) bot.statuses.update(status=tweet) except TwitterError as e: logging.debug('[!] TwitterError %s'%(str(e))) diff --git a/lib/Stats.py b/lib/Stats.py index 3548e10..9d64441 100644 --- a/lib/Stats.py +++ b/lib/Stats.py @@ -66,7 +66,7 @@ def monitor(self,twitterBot): if msg: with twitterBot.tweetLock: try: - logging.debug('[++++++++++++] Status Tweet %s'%(msg)) + logging.debug('[+] Status Tweet %s'%(msg)) twitterBot.statuses.update(status=msg) except TwitterError as e: logging.debug('[!] TwitterError %s'%(str(e))) diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index 8427594..9b8270a 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -3,29 +3,30 @@ import logging import time import threading -from lib.RegexMgr import RegexMgr from lib.Stats import Stats class TwitterBot(Twitter): """ Subclassing the Twitter API and botifying it """ - def __init__(self): + def __init__(self,regexMgr): super(TwitterBot, self).__init__(auth=OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET)) self.tweetLock= threading.Lock() - #might not really be the right place for this, but for now. - self.regexMgr = RegexMgr() + self.regexMgr = regexMgr self.statusMgr = Stats() def status(self,aryDM,user): return self.statusMgr.status() def addregex(self,aryDM,user): + """ + The add regex assumes that ary[0] = 'addregex' and ary[1] = 'theregex' + """ response = None - if self.regexMgr.add(text,user): + if self.regexMgr.add(aryDM[1],user): response = "Your regex has been added! Thanks!" else: response = "I could not add your regex, it didn't validate. :(" @@ -55,6 +56,7 @@ def _parseTweet(self,dm,t): def monitor(self): """ + This function is expected to be on a separate thread. This stream function is blocking and will not yield, thus does not need to be in a loop; refer to the docs """ twitter_userstream = TwitterStream(auth=self.auth, domain='userstream.twitter.com') diff --git a/settings.py-example b/settings.py-example index 3e2d4df..0bc53fe 100644 --- a/settings.py-example +++ b/settings.py-example @@ -24,8 +24,12 @@ SLEEP_PASTIE = 30 tweet_history = "tweet.history" log_file = "output.log" -#this needs to be set to the max you'd expect to pull from a single page!! +# This needs to be set to the max you'd expect to pull from a single page!! # otherwise you will get a rolling queue and possible process parsed URLs more than once SEEN_DEQUE_LEN = 500 -STATS_FREQ = 60*60 +# Status freqquency the bot will spew out status; making math do the work as this is the number of seconds to delay between status updates +STATS_FREQ = 60*10 + +# This is the screen name of your bot +TWITTER_SCREEN_NAME = 'DataLeakBot' From 02719881a936ac44532586bff9648d8d88a8bcf8 Mon Sep 17 00:00:00 2001 From: j105rob Date: Fri, 21 Nov 2014 08:47:43 -0500 Subject: [PATCH 07/42] added check user submitted url via DM to twitterbot --- dumpmon.py | 15 ++++------- lib/Paste.py | 8 +++--- lib/Pastebin.py | 1 + lib/Pastie.py | 1 + lib/Site.py | 3 +++ lib/Slexy.py | 1 + lib/TwitterBot.py | 17 +++++++++++- lib/UserSubmitted.py | 62 ++++++++++++++++++++++++++++++++++++++++++++ lib/helper.py | 7 ++++- settings.py-example | 1 + 10 files changed, 101 insertions(+), 15 deletions(-) create mode 100644 lib/UserSubmitted.py diff --git a/dumpmon.py b/dumpmon.py index cec2f5a..4c7d5cc 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -12,7 +12,7 @@ from lib.Pastebin import Pastebin, PastebinPaste from lib.Slexy import Slexy, SlexyPaste from lib.Pastie import Pastie, PastiePaste -from lib.helper import log +from lib.helper import log, createThread from lib.TwitterBot import TwitterBot from lib.RegexMgr import RegexMgr from lib.Stats import Stats @@ -46,18 +46,13 @@ def monitor(): # Create lock for output log log_lock = threading.Lock() - - def createThread(target,*args,**kwargs): - t = threading.Thread(target=target, args=args, kwargs=kwargs) - t.daemon = True - t.start() createThread(bot.monitor) - #createThread(Stats().monitor,bot) + createThread(Stats().monitor,bot) - #createThread(Pastebin().monitor,bot) - #createThread(Slexy().monitor,bot) - #createThread(Pastie().monitor,bot) + createThread(Pastebin().monitor,bot) + createThread(Slexy().monitor,bot) + createThread(Pastie().monitor,bot) # Let threads run try: diff --git a/lib/Paste.py b/lib/Paste.py index e1f5c23..c51a1a8 100644 --- a/lib/Paste.py +++ b/lib/Paste.py @@ -7,7 +7,6 @@ class Paste(object): def __init__(self,id): ''' class Paste: Generic "Paste" object to contain attributes of a standard paste - ''' self.id = id self.emails = 0 @@ -42,11 +41,13 @@ def match(self): self.hashes = regexes['hash32'].findall(self.text) self.num_emails = len(self.emails) - + logging.debug("[*] Num Emails: %i"%(self.num_emails)) self.num_hashes = len(self.hashes) + logging.debug("[*] Num Hashes: %i"%(self.num_hashes)) if self.num_emails > 0: self.sites = list(set([re.search('@(.*)$', email).group(1).lower() for email in self.emails])) + logging.debug("[*] Num Sites: %i"%(len(self.sites))) for regex in regexes['db_keywords']: if regex.search(self.text): @@ -77,5 +78,6 @@ def match(self): if regex.search(self.text): self.type = None break - + + logging.debug("[*] Type: %s"%(self.type)) return self.type diff --git a/lib/Pastebin.py b/lib/Pastebin.py index fc5dde0..0dce7dd 100644 --- a/lib/Pastebin.py +++ b/lib/Pastebin.py @@ -19,6 +19,7 @@ def __init__(self): self.BASE_URL = 'http://pastebin.com' self.sleep = SLEEP_PASTEBIN super(Pastebin, self).__init__() + logging.info('[+] Started PasteBin') def parse(self): return BeautifulSoup(helper.download(self.BASE_URL + '/archive')).find_all( diff --git a/lib/Pastie.py b/lib/Pastie.py index 2c84de5..42792d2 100644 --- a/lib/Pastie.py +++ b/lib/Pastie.py @@ -19,6 +19,7 @@ def __init__(self): self.BASE_URL = 'http://pastie.org' self.sleep = SLEEP_PASTIE super(Pastie, self).__init__() + logging.info('[+] Started Pastie') def parse(self): return [tag for tag in BeautifulSoup(helper.download( diff --git a/lib/Site.py b/lib/Site.py index 5b09b4c..65d9035 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -84,14 +84,17 @@ def list(self): def parse(self): #override this + logging.error('[@] Function Not Implemented in Subclass') pass def update(self): #override this + logging.error('[@] Function Not Implemented in Subclass') pass def get_paste_text(self): #override this + logging.error('[@] Function Not Implemented in Subclass') pass def monitor(self, bot): diff --git a/lib/Slexy.py b/lib/Slexy.py index c6dc0d8..9e0ba44 100644 --- a/lib/Slexy.py +++ b/lib/Slexy.py @@ -19,6 +19,7 @@ def __init__(self): self.BASE_URL = 'http://slexy.org' self.sleep = SLEEP_SLEXY super(Slexy, self).__init__() + logging.info('[+] Started Slexy') def parse(self): return BeautifulSoup(helper.download(self.BASE_URL + '/recent')).find_all( diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index 9b8270a..5c73902 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -4,6 +4,9 @@ import time import threading from lib.Stats import Stats +from lib.helper import createThread + +from lib.UserSubmitted import UserSubmitted, UserSubmittedPaste class TwitterBot(Twitter): """ @@ -17,6 +20,16 @@ def __init__(self,regexMgr): self.regexMgr = regexMgr self.statusMgr = Stats() + self.userSubmittedSite = UserSubmitted() + logging.info('[+] Started TwitterBot') + + def check(self,aryDM,user): + self.userSubmittedSite.update(aryDM[1]) + response = self.userSubmittedSite.monitor(self) + if response: + return response + else: + return "I did not find anything interesting for "+aryDM[1] def status(self,aryDM,user): return self.statusMgr.status() @@ -40,9 +53,11 @@ def _parseTweet(self,dm,t): response = None #assume that we are going to use a space delim protocol and the ary[0] is the function name to call. aryDM = dm['text'].split() + logging.info('[+] Processing DM action: %s'%(aryDM[0])) f = getattr(self,aryDM[0]) if f: - response = f(aryDM,dm['sender']['screen_name']) + user = dm['sender']['screen_name'] + response = f(aryDM, user) logging.info('[+] Sending DM response: %s Screen Name: %s'%(response,dm['sender']['screen_name'])) else: logging.error('[!] Could not find function in protocol: %s Screen Name: %s'%(aryDM[0],dm['sender']['screen_name'])) diff --git a/lib/UserSubmitted.py b/lib/UserSubmitted.py new file mode 100644 index 0000000..f782ead --- /dev/null +++ b/lib/UserSubmitted.py @@ -0,0 +1,62 @@ +from .Site import Site +from .Paste import Paste +from bs4 import BeautifulSoup +from . import helper +import time +from settings import USE_DB +from twitter import TwitterError +import logging +from urlunshort import resolve + +class UserSubmittedPaste(Paste): + def __init__(self, url): + super(UserSubmittedPaste, self).__init__(url) + self.headers = None + self.url = resolve(url) + logging.info('[+] URL expanded to %s'%(self.url)) + +class UserSubmitted(Site): + def __init__(self): + super(UserSubmitted, self).__init__() + logging.info('[+] Started UserSubmitted') + + def parse(self): + pass + + def update(self,url): + paste = UserSubmittedPaste(url) + if not self.hasSeen(paste): + logging.info('Adding User Sumbmitted URL: ' + paste.url) + self.put(paste) + + def monitor(self, bot): + if not self.empty(): + paste = self.get() + logging.info('[*] Checking ' + paste.url) + paste.text = self.get_paste_text(paste) + tweet = helper.build_tweet(paste) + if tweet: + logging.info(tweet) + with bot.tweetLock: + if USE_DB: + self.db_client.save({ + 'pid' : paste.id, + 'text' : paste.text, + 'emails' : paste.emails, + 'hashes' : paste.hashes, + 'num_emails' : paste.num_emails, + 'num_hashes' : paste.num_hashes, + 'type' : paste.type, + 'db_keywords' : paste.db_keywords, + 'url' : paste.url + }) + try: + logging.debug('[+] Tweet %s'%(tweet)) + bot.statuses.update(status=tweet) + return tweet + except TwitterError as e: + logging.debug('[!] TwitterError %s'%(str(e))) + + + def get_paste_text(self, paste): + return helper.download(paste.url) \ No newline at end of file diff --git a/lib/helper.py b/lib/helper.py index 585d705..d24c0e6 100644 --- a/lib/helper.py +++ b/lib/helper.py @@ -8,10 +8,15 @@ import settings from time import sleep, strftime import logging +import threading r = requests.Session() - +def createThread(target,*args,**kwargs): + t = threading.Thread(target=target, args=args, kwargs=kwargs) + t.daemon = True + t.start() + def download(url, headers=None): if not headers: headers = None diff --git a/settings.py-example b/settings.py-example index 0bc53fe..2978ed1 100644 --- a/settings.py-example +++ b/settings.py-example @@ -20,6 +20,7 @@ SLEEP_SLEXY = 60 SLEEP_PASTEBIN = 15 SLEEP_PASTIE = 30 + # Other configuration tweet_history = "tweet.history" log_file = "output.log" From 16ce12453353b409d9101128cd6dcbc808728bd4 Mon Sep 17 00:00:00 2001 From: j105rob Date: Fri, 21 Nov 2014 12:25:09 -0500 Subject: [PATCH 08/42] changed out to curl for pulling raw data, resets were preventing data from being downloaded." --- dumpmon.py | 2 +- lib/Paste.py | 18 +++++++++++++++--- lib/Pastebin.py | 2 +- lib/Pastie.py | 2 +- lib/Site.py | 4 ++-- lib/TwitterBot.py | 5 +++-- lib/UserSubmitted.py | 2 +- lib/helper.py | 13 +++++++++++++ lib/regexes.py | 1 + 9 files changed, 38 insertions(+), 11 deletions(-) diff --git a/dumpmon.py b/dumpmon.py index 4c7d5cc..ed7b0c3 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -48,7 +48,7 @@ def monitor(): log_lock = threading.Lock() createThread(bot.monitor) - createThread(Stats().monitor,bot) + #createThread(Stats().monitor,bot) createThread(Pastebin().monitor,bot) createThread(Slexy().monitor,bot) diff --git a/lib/Paste.py b/lib/Paste.py index c51a1a8..dcac8ca 100644 --- a/lib/Paste.py +++ b/lib/Paste.py @@ -9,8 +9,9 @@ def __init__(self,id): class Paste: Generic "Paste" object to contain attributes of a standard paste ''' self.id = id - self.emails = 0 - self.hashes = 0 + self.emails = [] + self.emails2 = [] + self.hashes = [] self.num_emails = 0 self.num_hashes = 0 self.text = None @@ -36,12 +37,23 @@ def match(self): ''' # Get the amount of emails - self.emails = list(set(regexes['email'].findall(self.text))) + r = self.text.splitlines() + logging.debug("[*] Text: %s"%(self.text)) + logging.debug("[*] Num Lines in text: %i"%(len(r))) + if regexes['email'].search(self.text): + self.emails = regexes['email'].findall(self.text) + + if regexes['email2'].search(self.text): + self.emails2 = regexes['email2'].findall(self.text) self.hashes = regexes['hash32'].findall(self.text) self.num_emails = len(self.emails) logging.debug("[*] Num Emails: %i"%(self.num_emails)) + + self.num_emails = len(self.emails2) + logging.debug("[*] Num Emails2: %i"%(self.num_emails)) + self.num_hashes = len(self.hashes) logging.debug("[*] Num Hashes: %i"%(self.num_hashes)) diff --git a/lib/Pastebin.py b/lib/Pastebin.py index 0dce7dd..fd4eb92 100644 --- a/lib/Pastebin.py +++ b/lib/Pastebin.py @@ -38,4 +38,4 @@ def update(self): logging.info('Pastebin Added URLs: ' + str(i)) def get_paste_text(self, paste): - return helper.download(paste.url) + return helper.curl(paste.url) diff --git a/lib/Pastie.py b/lib/Pastie.py index 42792d2..8e770b8 100644 --- a/lib/Pastie.py +++ b/lib/Pastie.py @@ -38,4 +38,4 @@ def update(self): logging.info('Pastie Added URLs: ' + str(i)) def get_paste_text(self, paste): - return BeautifulSoup(helper.download(paste.url)).pre.text \ No newline at end of file + return BeautifulSoup(helper.curl(paste.url)).pre.text \ No newline at end of file diff --git a/lib/Site.py b/lib/Site.py index 65d9035..f432e8a 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -102,7 +102,7 @@ def monitor(self, bot): while(1): while not self.empty(): #need to sleep to avoid the ban.... - #time.sleep(self.sleep/4) + time.sleep(2) paste = self.get() logging.info('[*] Checking ' + paste.url) paste.text = self.get_paste_text(paste) @@ -126,7 +126,7 @@ def monitor(self, bot): logging.debug('[+] Tweet %s'%(tweet)) bot.statuses.update(status=tweet) except TwitterError as e: - logging.debug('[!] TwitterError %s'%(str(e))) + logging.error('[!] TwitterError %s'%(str(e))) self.update() while self.empty(): logging.debug('[*] No results... sleeping') diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index 5c73902..615bc26 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -84,10 +84,11 @@ def monitor(self): #process DMs, but only from other people if 'direct_message' in msg and msg['direct_message']['sender']['screen_name'] != TWITTER_SCREEN_NAME: self._parseTweet(msg['direct_message'],msg) - + except StopIteration: print("stopping iteration") - + except TwitterError as e: + logging.error('[!] TwitterError %s'%(str(e))) diff --git a/lib/UserSubmitted.py b/lib/UserSubmitted.py index f782ead..d710d75 100644 --- a/lib/UserSubmitted.py +++ b/lib/UserSubmitted.py @@ -59,4 +59,4 @@ def monitor(self, bot): def get_paste_text(self, paste): - return helper.download(paste.url) \ No newline at end of file + return helper.curl(paste.url) \ No newline at end of file diff --git a/lib/helper.py b/lib/helper.py index d24c0e6..c07db7a 100644 --- a/lib/helper.py +++ b/lib/helper.py @@ -10,6 +10,8 @@ import logging import threading +import pycurl +from StringIO import StringIO r = requests.Session() def createThread(target,*args,**kwargs): @@ -17,6 +19,17 @@ def createThread(target,*args,**kwargs): t.daemon = True t.start() +def curl (url): + buffer = StringIO() + c = pycurl.Curl() + c.setopt(c.URL, url) + c.setopt(c.WRITEDATA, buffer) + c.perform() + rc = c.getinfo(c.RESPONSE_CODE) + c.close() + logging.debug('[*] Response code: %d'%(rc)) + return buffer.getvalue() + def download(url, headers=None): if not headers: headers = None diff --git a/lib/regexes.py b/lib/regexes.py index 4d1e535..2486d25 100644 --- a/lib/regexes.py +++ b/lib/regexes.py @@ -2,6 +2,7 @@ regexes = { 'email': re.compile(r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}', re.I), + 'email2':re.compile(r'[\w\.-]+@[\w\.-]+'), #'ssn' : re.compile(r'\d{3}-?\d{2}-?\d{4}'), 'hash32': re.compile(r'[^ Date: Mon, 24 Nov 2014 10:54:25 -0500 Subject: [PATCH 09/42] added HaveIBeenPwned site --- dumpmon.py | 5 ++++- lib/HaveIBeen.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++ lib/Paste.py | 2 +- lib/Pastebin.py | 2 +- lib/Pastie.py | 2 +- lib/Slexy.py | 4 ++-- lib/TwitterBot.py | 33 +++++++++++++++++------------ lib/helper.py | 9 +++++--- 8 files changed, 88 insertions(+), 23 deletions(-) create mode 100644 lib/HaveIBeen.py diff --git a/dumpmon.py b/dumpmon.py index ed7b0c3..7470be7 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -12,6 +12,8 @@ from lib.Pastebin import Pastebin, PastebinPaste from lib.Slexy import Slexy, SlexyPaste from lib.Pastie import Pastie, PastiePaste +from lib.HaveIBeen import HaveIBeen, HaveIBeenPaste + from lib.helper import log, createThread from lib.TwitterBot import TwitterBot from lib.RegexMgr import RegexMgr @@ -48,8 +50,9 @@ def monitor(): log_lock = threading.Lock() createThread(bot.monitor) - #createThread(Stats().monitor,bot) + createThread(Stats().monitor,bot) + createThread(HaveIBeen().monitor,bot) createThread(Pastebin().monitor,bot) createThread(Slexy().monitor,bot) createThread(Pastie().monitor,bot) diff --git a/lib/HaveIBeen.py b/lib/HaveIBeen.py new file mode 100644 index 0000000..fea50aa --- /dev/null +++ b/lib/HaveIBeen.py @@ -0,0 +1,54 @@ +""" +Troy Hunt's RSS Feed for the last 50 pastes + +http://feeds.feedburner.com/HaveIBeenPwnedLatestPastes + +""" +import feedparser + +from .Site import Site +from .Paste import Paste +from bs4 import BeautifulSoup +from . import helper +from time import sleep +from settings import SLEEP_HAVEIBEEN +from twitter import TwitterError +import logging + +class HaveIBeenPaste(Paste): + def __init__(self, id): + super(HaveIBeenPaste, self).__init__(id) + self.headers = None + self.url = 'http://pastebin.com/raw.php?i=' + self.id + +class HaveIBeen(Site): + def __init__(self): + super(HaveIBeen, self).__init__() + self.sleep = SLEEP_HAVEIBEEN + logging.info('[+] Started HaveIBeen') + self.feedURL = 'http://feeds.feedburner.com/HaveIBeenPwnedLatestPastes' + + def _parse(self): + d = feedparser.parse(self.feedURL) + return d['entries'] + + def update(self): + logging.info('Retrieving HaveIBeenPwned ID\'s') + i=0 + + for entry in self._parse(): + l = entry['links'][0]['href'] + link = l.split(r'/') + paste = HaveIBeenPaste(link[3]) + if not self.hasSeen(paste): + i+=1 + self.put(paste) + logging.info('HaveIBeenPwned Added URLs: ' + str(i)) + + + def get_paste_text(self, paste): + return helper.curl(paste.url) + +if __name__ == '__main__': + c = HaveIBeen() + c.update() \ No newline at end of file diff --git a/lib/Paste.py b/lib/Paste.py index dcac8ca..14d2322 100644 --- a/lib/Paste.py +++ b/lib/Paste.py @@ -38,7 +38,7 @@ def match(self): ''' # Get the amount of emails r = self.text.splitlines() - logging.debug("[*] Text: %s"%(self.text)) + #logging.debug("[*] Text: %s"%(self.text)) logging.debug("[*] Num Lines in text: %i"%(len(r))) if regexes['email'].search(self.text): self.emails = regexes['email'].findall(self.text) diff --git a/lib/Pastebin.py b/lib/Pastebin.py index fd4eb92..ae00159 100644 --- a/lib/Pastebin.py +++ b/lib/Pastebin.py @@ -22,7 +22,7 @@ def __init__(self): logging.info('[+] Started PasteBin') def parse(self): - return BeautifulSoup(helper.download(self.BASE_URL + '/archive')).find_all( + return BeautifulSoup(helper.curl(self.BASE_URL + '/archive')).find_all( lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:]) def update(self): diff --git a/lib/Pastie.py b/lib/Pastie.py index 8e770b8..54a191e 100644 --- a/lib/Pastie.py +++ b/lib/Pastie.py @@ -22,7 +22,7 @@ def __init__(self): logging.info('[+] Started Pastie') def parse(self): - return [tag for tag in BeautifulSoup(helper.download( + return [tag for tag in BeautifulSoup(helper.curl( self.BASE_URL + '/pastes')).find_all('p', 'link') if tag.a] def update(self): diff --git a/lib/Slexy.py b/lib/Slexy.py index 9e0ba44..adcbc7e 100644 --- a/lib/Slexy.py +++ b/lib/Slexy.py @@ -22,7 +22,7 @@ def __init__(self): logging.info('[+] Started Slexy') def parse(self): - return BeautifulSoup(helper.download(self.BASE_URL + '/recent')).find_all( + return BeautifulSoup(helper.curl(self.BASE_URL + '/recent')).find_all( lambda tag: tag.name == 'td' and tag.a and '/view/' in tag.a['href']) def update(self): @@ -38,4 +38,4 @@ def update(self): logging.info('Slexy Added URLs: ' + str(i)) def get_paste_text(self, paste): - return helper.download(paste.url, paste.headers) + return helper.curl(paste.url, paste.headers['Referer']) diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index 615bc26..edd9f6d 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -54,20 +54,25 @@ def _parseTweet(self,dm,t): #assume that we are going to use a space delim protocol and the ary[0] is the function name to call. aryDM = dm['text'].split() logging.info('[+] Processing DM action: %s'%(aryDM[0])) - f = getattr(self,aryDM[0]) - if f: - user = dm['sender']['screen_name'] - response = f(aryDM, user) - logging.info('[+] Sending DM response: %s Screen Name: %s'%(response,dm['sender']['screen_name'])) - else: - logging.error('[!] Could not find function in protocol: %s Screen Name: %s'%(aryDM[0],dm['sender']['screen_name'])) - - if response: - with self.tweetLock: - try: - self.direct_messages.new(user=dm['sender']['screen_name'],text=response) - except TwitterError as e: - logging.debug('[!] TwitterError %s'%(str(e))) + + try: + f = getattr(self,aryDM[0]) + if f: + user = dm['sender']['screen_name'] + response = f(aryDM, user) + logging.info('[+] Sending DM response: %s Screen Name: %s'%(response,dm['sender']['screen_name'])) + else: + logging.error('[!] Could not find function in protocol: %s Screen Name: %s'%(aryDM[0],dm['sender']['screen_name'])) + + if response: + with self.tweetLock: + try: + self.direct_messages.new(user=dm['sender']['screen_name'],text=response) + except TwitterError as e: + logging.debug('[!] TwitterError %s'%(str(e))) + except Exception as e: + logging.error('[!] Error trying to parse DM: %s '%(aryDM)) + def monitor(self): """ diff --git a/lib/helper.py b/lib/helper.py index c07db7a..bf0cebc 100644 --- a/lib/helper.py +++ b/lib/helper.py @@ -19,11 +19,15 @@ def createThread(target,*args,**kwargs): t.daemon = True t.start() -def curl (url): +def curl (url,referer=None): buffer = StringIO() c = pycurl.Curl() c.setopt(c.URL, url) c.setopt(c.WRITEDATA, buffer) + + if referer: + c.setopt(c.REFERER, referer) + c.perform() rc = c.getinfo(c.RESPONSE_CODE) c.close() @@ -83,6 +87,5 @@ def build_tweet(paste): elif paste.type == 'honeypot': tweet += ' Dionaea Honeypot Log' tweet += ' #infosec #dataleak' - if paste.num_emails > 0: - print(paste.emails) + return tweet From b26fdb8a2e707c517e3111df24c0b869dd92f443 Mon Sep 17 00:00:00 2001 From: j105rob Date: Sun, 30 Nov 2014 06:21:00 -0500 Subject: [PATCH 10/42] error handling update --- lib/HaveIBeen.py | 8 ++++++-- lib/Pastie.py | 6 +++++- lib/Site.py | 25 ++++++++++++++----------- lib/helper.py | 29 ++++++++++++++++------------- 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/lib/HaveIBeen.py b/lib/HaveIBeen.py index fea50aa..1b374e8 100644 --- a/lib/HaveIBeen.py +++ b/lib/HaveIBeen.py @@ -29,8 +29,12 @@ def __init__(self): self.feedURL = 'http://feeds.feedburner.com/HaveIBeenPwnedLatestPastes' def _parse(self): - d = feedparser.parse(self.feedURL) - return d['entries'] + try: + d = feedparser.parse(self.feedURL) + return d['entries'] + except Exception as e: + logging.error('[!] Feed Parser Error: %s'%(str(e))) + return None def update(self): logging.info('Retrieving HaveIBeenPwned ID\'s') diff --git a/lib/Pastie.py b/lib/Pastie.py index 54a191e..0645b9c 100644 --- a/lib/Pastie.py +++ b/lib/Pastie.py @@ -38,4 +38,8 @@ def update(self): logging.info('Pastie Added URLs: ' + str(i)) def get_paste_text(self, paste): - return BeautifulSoup(helper.curl(paste.url)).pre.text \ No newline at end of file + try: + return BeautifulSoup(helper.curl(paste.url)).pre.text + except Exception as e: + logging.error('[!] Beautiful Soup Error: %s'%(str(e))) + return None \ No newline at end of file diff --git a/lib/Site.py b/lib/Site.py index f432e8a..c906d72 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -111,17 +111,20 @@ def monitor(self, bot): logging.info(tweet) with bot.tweetLock: if USE_DB: - self.db_client.save({ - 'pid' : paste.id, - 'text' : paste.text, - 'emails' : paste.emails, - 'hashes' : paste.hashes, - 'num_emails' : paste.num_emails, - 'num_hashes' : paste.num_hashes, - 'type' : paste.type, - 'db_keywords' : paste.db_keywords, - 'url' : paste.url - }) + try: + self.db_client.save({ + 'pid' : paste.id, + 'text' : paste.text, + 'emails' : paste.emails, + 'hashes' : paste.hashes, + 'num_emails' : paste.num_emails, + 'num_hashes' : paste.num_hashes, + 'type' : paste.type, + 'db_keywords' : paste.db_keywords, + 'url' : paste.url + }) + except Exception as e: + logging.error('[!] MongoDB Error %s'%(str(e))) try: logging.debug('[+] Tweet %s'%(tweet)) bot.statuses.update(status=tweet) diff --git a/lib/helper.py b/lib/helper.py index bf0cebc..2808eec 100644 --- a/lib/helper.py +++ b/lib/helper.py @@ -20,19 +20,22 @@ def createThread(target,*args,**kwargs): t.start() def curl (url,referer=None): - buffer = StringIO() - c = pycurl.Curl() - c.setopt(c.URL, url) - c.setopt(c.WRITEDATA, buffer) - - if referer: - c.setopt(c.REFERER, referer) - - c.perform() - rc = c.getinfo(c.RESPONSE_CODE) - c.close() - logging.debug('[*] Response code: %d'%(rc)) - return buffer.getvalue() + try: + buffer = StringIO() + c = pycurl.Curl() + c.setopt(c.URL, url) + c.setopt(c.WRITEDATA, buffer) + + if referer: + c.setopt(c.REFERER, referer) + + c.perform() + rc = c.getinfo(c.RESPONSE_CODE) + c.close() + logging.debug('[*] Response code: %d'%(rc)) + return buffer.getvalue() + except Exception as e: + logging.error('[!] Curl Error: %s'%(str(e))) def download(url, headers=None): if not headers: From 93265dff4426670c9cfe27185c73defd3a5bc0e4 Mon Sep 17 00:00:00 2001 From: j105rob Date: Sun, 30 Nov 2014 07:43:36 -0500 Subject: [PATCH 11/42] error handling update --- lib/TwitterBot.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index edd9f6d..226147e 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -89,6 +89,9 @@ def monitor(self): #process DMs, but only from other people if 'direct_message' in msg and msg['direct_message']['sender']['screen_name'] != TWITTER_SCREEN_NAME: self._parseTweet(msg['direct_message'],msg) + + if 'event' in msg: + logging.debug("{^} %s"%(msg)) except StopIteration: print("stopping iteration") From 3b073ec517806422091fe57e155e0c455d9ed9c4 Mon Sep 17 00:00:00 2001 From: j105rob Date: Tue, 2 Dec 2014 06:13:21 -0500 Subject: [PATCH 12/42] adding tools --- lib/Paste.py | 7 +++++-- tools/dumpemail.py | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 tools/dumpemail.py diff --git a/lib/Paste.py b/lib/Paste.py index 14d2322..2f47537 100644 --- a/lib/Paste.py +++ b/lib/Paste.py @@ -37,8 +37,11 @@ def match(self): ''' # Get the amount of emails - r = self.text.splitlines() - #logging.debug("[*] Text: %s"%(self.text)) + try: + r = self.text.splitlines() + except Exception as e: + logging.debug("[!] Error: %s"%(str(e))) + logging.debug("[*] Num Lines in text: %i"%(len(r))) if regexes['email'].search(self.text): self.emails = regexes['email'].findall(self.text) diff --git a/tools/dumpemail.py b/tools/dumpemail.py new file mode 100644 index 0000000..2bf7d21 --- /dev/null +++ b/tools/dumpemail.py @@ -0,0 +1,22 @@ +from pymongo import MongoClient +from settings import USE_DB, DB_HOST, DB_PORT + +client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes + +def uniqueEmailSet(): + map = Code("function () {" + " this.emails.forEach(function(z) {" + " emit(z,1);" + " });" + "}") + reduce = Code("function (key,values) {" + "var total = 0;" + "for (var i = 0; i Date: Tue, 2 Dec 2014 06:36:07 -0500 Subject: [PATCH 13/42] adding tools --- __init__.py | 0 tools/__init__.py | 0 tools/dumpemail.py | 5 +++-- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 __init__.py create mode 100644 tools/__init__.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/dumpemail.py b/tools/dumpemail.py index 2bf7d21..e28c670 100644 --- a/tools/dumpemail.py +++ b/tools/dumpemail.py @@ -1,5 +1,6 @@ from pymongo import MongoClient -from settings import USE_DB, DB_HOST, DB_PORT +DB_HOST = 'localhost' +DB_PORT = 27017 client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes @@ -19,4 +20,4 @@ def uniqueEmailSet(): result = client.map_reduce(map,reduce,"res") return result -uniqueEmailSet() \ No newline at end of file +print uniqueEmailSet() \ No newline at end of file From 93f71d9220fbf4598955dd9e3c7b16f61424adf2 Mon Sep 17 00:00:00 2001 From: j105rob Date: Tue, 2 Dec 2014 06:37:19 -0500 Subject: [PATCH 14/42] adding tools --- tools/dumpemail.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/dumpemail.py b/tools/dumpemail.py index e28c670..dc5a3e9 100644 --- a/tools/dumpemail.py +++ b/tools/dumpemail.py @@ -1,4 +1,5 @@ from pymongo import MongoClient +from bson import Code DB_HOST = 'localhost' DB_PORT = 27017 From 128498f18a2896b576db7bd961684f5a4fb8fcb2 Mon Sep 17 00:00:00 2001 From: j105rob Date: Tue, 2 Dec 2014 06:42:39 -0500 Subject: [PATCH 15/42] adding tools --- tools/dumpemail.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/dumpemail.py b/tools/dumpemail.py index dc5a3e9..c189805 100644 --- a/tools/dumpemail.py +++ b/tools/dumpemail.py @@ -21,4 +21,5 @@ def uniqueEmailSet(): result = client.map_reduce(map,reduce,"res") return result -print uniqueEmailSet() \ No newline at end of file +for r in uniqueEmailSet().find(): + print r \ No newline at end of file From 75f56fe53f0eb1d292d66742367336c27cda9f4f Mon Sep 17 00:00:00 2001 From: j105rob Date: Tue, 2 Dec 2014 07:17:56 -0500 Subject: [PATCH 16/42] rotating logs --- dumpmon.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dumpmon.py b/dumpmon.py index 7470be7..301b769 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -22,6 +22,7 @@ from settings import log_file import threading import logging +from logging.handlers import RotatingFileHandler def monitor(): @@ -41,6 +42,11 @@ def monitor(): logging.basicConfig( format='%(asctime)s [%(levelname)s] [%(module)s] [%(funcName)s] %(message)s', filename=log_file, level=level) + + handler = RotatingFileHandler(path, maxBytes=20*1000, + backupCount=5) + logging.addHandler(handler) + logging.info('Monitoring...') regexMgr = RegexMgr() From 96c4179297f07ae7b002e564027041d00cc1c7c7 Mon Sep 17 00:00:00 2001 From: j105rob Date: Wed, 3 Dec 2014 06:35:54 -0500 Subject: [PATCH 17/42] add log rotation --- dumpmon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dumpmon.py b/dumpmon.py index 301b769..28c141e 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -43,7 +43,7 @@ def monitor(): logging.basicConfig( format='%(asctime)s [%(levelname)s] [%(module)s] [%(funcName)s] %(message)s', filename=log_file, level=level) - handler = RotatingFileHandler(path, maxBytes=20*1000, + handler = RotatingFileHandler(".", maxBytes=20*1000, backupCount=5) logging.addHandler(handler) From edee974ec89ba055ed9d21bc2db93527d8836984 Mon Sep 17 00:00:00 2001 From: j105rob Date: Wed, 3 Dec 2014 06:38:18 -0500 Subject: [PATCH 18/42] add log rotation --- dumpmon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dumpmon.py b/dumpmon.py index 28c141e..d176a59 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -43,7 +43,7 @@ def monitor(): logging.basicConfig( format='%(asctime)s [%(levelname)s] [%(module)s] [%(funcName)s] %(message)s', filename=log_file, level=level) - handler = RotatingFileHandler(".", maxBytes=20*1000, + handler = RotatingFileHandler(log_file, maxBytes=20*1000, backupCount=5) logging.addHandler(handler) From d5b79233e55110ed3c7077547e1b05d1629e413c Mon Sep 17 00:00:00 2001 From: j105rob Date: Wed, 3 Dec 2014 06:45:29 -0500 Subject: [PATCH 19/42] add log rotation --- dumpmon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dumpmon.py b/dumpmon.py index d176a59..07c8c50 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -45,7 +45,7 @@ def monitor(): handler = RotatingFileHandler(log_file, maxBytes=20*1000, backupCount=5) - logging.addHandler(handler) + #logging.addHandler(handler) logging.info('Monitoring...') From be39c3931d7bfa9b566133cf98a65e9a1f3eea9f Mon Sep 17 00:00:00 2001 From: j105rob Date: Wed, 3 Dec 2014 07:32:15 -0500 Subject: [PATCH 20/42] add log rotation --- lib/TwitterBot.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index 226147e..7943642 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -72,7 +72,23 @@ def _parseTweet(self,dm,t): logging.debug('[!] TwitterError %s'%(str(e))) except Exception as e: logging.error('[!] Error trying to parse DM: %s '%(aryDM)) - + + def auto_follow_followers(self): + """ + Follows back everyone who's followed you + """ + + following = set(self.friends.ids(screen_name=TWITTER_HANDLE)["ids"]) + followers = set(self.followers.ids(screen_name=TWITTER_HANDLE)["ids"]) + + not_following_back = followers - following + + for user_id in not_following_back: + try: + self.friendships.create(user_id=user_id, follow=True) + logging.info('[+] Now Following: %s'%(user_id)) + except Exception as e: + logging.error('[!] Error trying to add followers: %s '%(str(e))) def monitor(self): """ @@ -83,6 +99,7 @@ def monitor(self): try: for msg in twitter_userstream.user(): #logging.debug("{^} %s"%(msg)) + self.auto_follow_followers() if 'text' in msg: print("[$] Recieved Tweet %s from %s"%(msg['text'],msg['user']['screen_name'])) From e20af2fbb7d1602b53d3581c93747be7ff7e5618 Mon Sep 17 00:00:00 2001 From: j105rob Date: Wed, 3 Dec 2014 07:36:21 -0500 Subject: [PATCH 21/42] add log rotation --- lib/helper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/helper.py b/lib/helper.py index 2808eec..61e3ba7 100644 --- a/lib/helper.py +++ b/lib/helper.py @@ -32,7 +32,8 @@ def curl (url,referer=None): c.perform() rc = c.getinfo(c.RESPONSE_CODE) c.close() - logging.debug('[*] Response code: %d'%(rc)) + if rc != 200: + logging.error('[!] %s Response code: %d'%(url,rc)) return buffer.getvalue() except Exception as e: logging.error('[!] Curl Error: %s'%(str(e))) From 7a803507246c7f857fa94a9ce7b83386ecd64d77 Mon Sep 17 00:00:00 2001 From: j105rob Date: Wed, 3 Dec 2014 07:37:27 -0500 Subject: [PATCH 22/42] add log rotation --- lib/TwitterBot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index 7943642..35d5d75 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -78,8 +78,8 @@ def auto_follow_followers(self): Follows back everyone who's followed you """ - following = set(self.friends.ids(screen_name=TWITTER_HANDLE)["ids"]) - followers = set(self.followers.ids(screen_name=TWITTER_HANDLE)["ids"]) + following = set(self.friends.ids(screen_name=TWITTER_SCREEN_NAME)["ids"]) + followers = set(self.followers.ids(screen_name=TWITTER_SCREEN_NAME)["ids"]) not_following_back = followers - following From 748b8d00ad5c48ff7d5f5b419b14b752b5011677 Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 07:12:32 -0500 Subject: [PATCH 23/42] wokring on delay for 403 issues --- lib/Site.py | 3 ++- lib/helper.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/Site.py b/lib/Site.py index c906d72..f988d61 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -8,6 +8,7 @@ from settings import USE_DB, DB_HOST, DB_PORT, SEEN_DEQUE_LEN import logging import helper +from random import randint from collections import deque @@ -102,7 +103,7 @@ def monitor(self, bot): while(1): while not self.empty(): #need to sleep to avoid the ban.... - time.sleep(2) + time.sleep(randint(5,17)) paste = self.get() logging.info('[*] Checking ' + paste.url) paste.text = self.get_paste_text(paste) diff --git a/lib/helper.py b/lib/helper.py index 61e3ba7..c049656 100644 --- a/lib/helper.py +++ b/lib/helper.py @@ -32,8 +32,15 @@ def curl (url,referer=None): c.perform() rc = c.getinfo(c.RESPONSE_CODE) c.close() + + #TODO: need to figure out a back off scenario + if rc == 403: + logging.error('[!] %s Response code: %d'%(url,rc)) + return "backoff" + if rc != 200: logging.error('[!] %s Response code: %d'%(url,rc)) + return buffer.getvalue() except Exception as e: logging.error('[!] Curl Error: %s'%(str(e))) From a81e9652b9f03b0c24f4366013f05a26d0486875 Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 07:14:26 -0500 Subject: [PATCH 24/42] wokring on delay for 403 issues --- lib/Paste.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Paste.py b/lib/Paste.py index 2f47537..ced94ae 100644 --- a/lib/Paste.py +++ b/lib/Paste.py @@ -39,10 +39,11 @@ def match(self): # Get the amount of emails try: r = self.text.splitlines() + logging.debug("[*] Num Lines in text: %i"%(len(r))) except Exception as e: logging.debug("[!] Error: %s"%(str(e))) - logging.debug("[*] Num Lines in text: %i"%(len(r))) + if regexes['email'].search(self.text): self.emails = regexes['email'].findall(self.text) From d47d09e32c1195be179b750392543fa51b174d8a Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 09:48:26 -0500 Subject: [PATCH 25/42] wokring on delay for 403 issues --- dumpmon.py | 27 +++++++++++++++++++-------- lib/HaveIBeen.py | 6 ++++-- lib/Paste.py | 18 ++++++++++++++++++ lib/Pastebin.py | 8 ++++++-- lib/Pastie.py | 13 +++++++------ lib/Site.py | 27 ++++++++------------------- lib/Slexy.py | 6 ++++-- lib/UserSubmitted.py | 19 +++++-------------- lib/helper.py | 3 ++- 9 files changed, 73 insertions(+), 54 deletions(-) diff --git a/dumpmon.py b/dumpmon.py index 07c8c50..70c37b7 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -54,20 +54,31 @@ def monitor(): # Create lock for output log log_lock = threading.Lock() - - createThread(bot.monitor) - createThread(Stats().monitor,bot) - - createThread(HaveIBeen().monitor,bot) - createThread(Pastebin().monitor,bot) - createThread(Slexy().monitor,bot) - createThread(Pastie().monitor,bot) + + #create an event to tell threads to keep running + isRunning = threading.Event() + isRunning.set() + #array to keep a handle on threads + workers = [] + workers.append(createThread(bot.monitor,isRunning)) + workers.append(createThread(Stats().monitor,bot,isRunning)) + workers.append(createThread(HaveIBeen().monitor,bot,isRunning)) + #workers.append(createThread(Pastebin().monitor,bot,isRunning)) + #workers.append(createThread(Slexy().monitor,bot,isRunning)) + #workers.append(createThread(Pastie().monitor,bot,isRunning)) # Let threads run try: while(1): sleep(5) except KeyboardInterrupt: + #signal threads to shutdown + isRunning.clear() + + #wait for threads to join + for t in workers: + t.join() + logging.warn('Stopped.') diff --git a/lib/HaveIBeen.py b/lib/HaveIBeen.py index 1b374e8..9a92279 100644 --- a/lib/HaveIBeen.py +++ b/lib/HaveIBeen.py @@ -21,6 +21,9 @@ def __init__(self, id): self.headers = None self.url = 'http://pastebin.com/raw.php?i=' + self.id + def get(self): + return helper.curl(self.url) + class HaveIBeen(Site): def __init__(self): super(HaveIBeen, self).__init__() @@ -50,8 +53,7 @@ def update(self): logging.info('HaveIBeenPwned Added URLs: ' + str(i)) - def get_paste_text(self, paste): - return helper.curl(paste.url) + if __name__ == '__main__': c = HaveIBeen() diff --git a/lib/Paste.py b/lib/Paste.py index ced94ae..8d05b4c 100644 --- a/lib/Paste.py +++ b/lib/Paste.py @@ -22,6 +22,24 @@ class Paste: Generic "Paste" object to contain attributes of a standard paste def __eq__(self,comparePaste): #logging.info('id %s compares to %s'%(self.id, comparePaste.id)) return self.id == comparePaste.id + + def __repr__(self): + return { + 'pid' : self.id, + 'text' : self.text, + 'emails' : self.emails, + 'hashes' : self.hashes, + 'num_emails' : self.num_emails, + 'num_hashes' : self.num_hashes, + 'type' : self.type, + 'db_keywords' : self.db_keywords, + 'url' : self.url + } + + def get(self): + #override this + logging.error('[@] Function Not Implemented in Subclass') + pass def match(self): ''' diff --git a/lib/Pastebin.py b/lib/Pastebin.py index ae00159..f3b7960 100644 --- a/lib/Pastebin.py +++ b/lib/Pastebin.py @@ -13,8 +13,14 @@ def __init__(self, id): super(PastebinPaste, self).__init__(id) self.headers = None self.url = 'http://pastebin.com/raw.php?i=' + self.id + + def get(self): + self.text = helper.curl(self.url) class Pastebin(Site): + """ + Pastebin will block your IP if you request more than 600 requests in 10 mins. This is per admin@pastebin.com + """ def __init__(self): self.BASE_URL = 'http://pastebin.com' self.sleep = SLEEP_PASTEBIN @@ -37,5 +43,3 @@ def update(self): self.put(paste) logging.info('Pastebin Added URLs: ' + str(i)) - def get_paste_text(self, paste): - return helper.curl(paste.url) diff --git a/lib/Pastie.py b/lib/Pastie.py index 0645b9c..3cf4a43 100644 --- a/lib/Pastie.py +++ b/lib/Pastie.py @@ -14,6 +14,13 @@ def __init__(self, id): self.headers = None self.url = 'http://pastie.org/pastes/' + self.id + '/text' + def get(self): + try: + self.text = BeautifulSoup(helper.curl(self.url)).pre.text + except Exception as e: + logging.error('[!] Beautiful Soup Error: %s'%(str(e))) + self.text = None + class Pastie(Site): def __init__(self): self.BASE_URL = 'http://pastie.org' @@ -37,9 +44,3 @@ def update(self): self.put(paste) logging.info('Pastie Added URLs: ' + str(i)) - def get_paste_text(self, paste): - try: - return BeautifulSoup(helper.curl(paste.url)).pre.text - except Exception as e: - logging.error('[!] Beautiful Soup Error: %s'%(str(e))) - return None \ No newline at end of file diff --git a/lib/Site.py b/lib/Site.py index f988d61..5654c97 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -93,37 +93,25 @@ def update(self): logging.error('[@] Function Not Implemented in Subclass') pass - def get_paste_text(self): - #override this - logging.error('[@] Function Not Implemented in Subclass') - pass + def terminating(self): + #this can be overridden in subclass + logging.debug('[!] Terminating.....') - def monitor(self, bot): + def monitor(self, bot, isRunning): self.update() - while(1): + while isRunning.is_set(): while not self.empty(): #need to sleep to avoid the ban.... time.sleep(randint(5,17)) paste = self.get() - logging.info('[*] Checking ' + paste.url) - paste.text = self.get_paste_text(paste) + paste.get() tweet = helper.build_tweet(paste) if tweet: logging.info(tweet) with bot.tweetLock: if USE_DB: try: - self.db_client.save({ - 'pid' : paste.id, - 'text' : paste.text, - 'emails' : paste.emails, - 'hashes' : paste.hashes, - 'num_emails' : paste.num_emails, - 'num_hashes' : paste.num_hashes, - 'type' : paste.type, - 'db_keywords' : paste.db_keywords, - 'url' : paste.url - }) + self.db_client.save(repr(paste)) except Exception as e: logging.error('[!] MongoDB Error %s'%(str(e))) try: @@ -136,3 +124,4 @@ def monitor(self, bot): logging.debug('[*] No results... sleeping') time.sleep(self.sleep) self.update() + self.terminating() diff --git a/lib/Slexy.py b/lib/Slexy.py index adcbc7e..7460225 100644 --- a/lib/Slexy.py +++ b/lib/Slexy.py @@ -13,6 +13,9 @@ def __init__(self, id): super(SlexyPaste, self).__init__(id) self.headers = {'Referer': 'http://slexy.org/view/' + self.id} self.url = 'http://slexy.org/raw/' + self.id + + def get(self): + self.text = helper.curl(self.url, self.headers['Referer']) class Slexy(Site): def __init__(self): @@ -37,5 +40,4 @@ def update(self): self.put(paste) logging.info('Slexy Added URLs: ' + str(i)) - def get_paste_text(self, paste): - return helper.curl(paste.url, paste.headers['Referer']) + diff --git a/lib/UserSubmitted.py b/lib/UserSubmitted.py index d710d75..1dd08f3 100644 --- a/lib/UserSubmitted.py +++ b/lib/UserSubmitted.py @@ -15,6 +15,9 @@ def __init__(self, url): self.url = resolve(url) logging.info('[+] URL expanded to %s'%(self.url)) + def get(self): + self.text = helper.curl(self.url) + class UserSubmitted(Site): def __init__(self): super(UserSubmitted, self).__init__() @@ -33,23 +36,13 @@ def monitor(self, bot): if not self.empty(): paste = self.get() logging.info('[*] Checking ' + paste.url) - paste.text = self.get_paste_text(paste) + paste.get() tweet = helper.build_tweet(paste) if tweet: logging.info(tweet) with bot.tweetLock: if USE_DB: - self.db_client.save({ - 'pid' : paste.id, - 'text' : paste.text, - 'emails' : paste.emails, - 'hashes' : paste.hashes, - 'num_emails' : paste.num_emails, - 'num_hashes' : paste.num_hashes, - 'type' : paste.type, - 'db_keywords' : paste.db_keywords, - 'url' : paste.url - }) + self.db_client.save(repr(paste)) try: logging.debug('[+] Tweet %s'%(tweet)) bot.statuses.update(status=tweet) @@ -58,5 +51,3 @@ def monitor(self, bot): logging.debug('[!] TwitterError %s'%(str(e))) - def get_paste_text(self, paste): - return helper.curl(paste.url) \ No newline at end of file diff --git a/lib/helper.py b/lib/helper.py index c049656..a5a28b5 100644 --- a/lib/helper.py +++ b/lib/helper.py @@ -18,6 +18,7 @@ def createThread(target,*args,**kwargs): t = threading.Thread(target=target, args=args, kwargs=kwargs) t.daemon = True t.start() + return t def curl (url,referer=None): try: @@ -78,7 +79,7 @@ def build_tweet(paste): ''' tweet = None if paste.match(): - logging.info('Paste Matched') + logging.debug('Paste Matched') tweet = paste.url if paste.type == 'db_dump': if paste.num_emails > 0: From 96b01021fa071ddfa50d2a51b092a2cbeb7f03da Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 09:51:51 -0500 Subject: [PATCH 26/42] wokring on delay for 403 issues --- lib/TwitterBot.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index 35d5d75..5190283 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -90,30 +90,32 @@ def auto_follow_followers(self): except Exception as e: logging.error('[!] Error trying to add followers: %s '%(str(e))) - def monitor(self): + def monitor(self, isRunning): """ This function is expected to be on a separate thread. This stream function is blocking and will not yield, thus does not need to be in a loop; refer to the docs """ twitter_userstream = TwitterStream(auth=self.auth, domain='userstream.twitter.com') - try: - for msg in twitter_userstream.user(): - #logging.debug("{^} %s"%(msg)) - self.auto_follow_followers() - if 'text' in msg: - print("[$] Recieved Tweet %s from %s"%(msg['text'],msg['user']['screen_name'])) - - #process DMs, but only from other people - if 'direct_message' in msg and msg['direct_message']['sender']['screen_name'] != TWITTER_SCREEN_NAME: - self._parseTweet(msg['direct_message'],msg) - - if 'event' in msg: - logging.debug("{^} %s"%(msg)) + + while isRunning.is_set(): + try: + for msg in twitter_userstream.user(): + #logging.debug("{^} %s"%(msg)) + self.auto_follow_followers() + if 'text' in msg: + print("[$] Recieved Tweet %s from %s"%(msg['text'],msg['user']['screen_name'])) + + #process DMs, but only from other people + if 'direct_message' in msg and msg['direct_message']['sender']['screen_name'] != TWITTER_SCREEN_NAME: + self._parseTweet(msg['direct_message'],msg) - except StopIteration: - print("stopping iteration") - except TwitterError as e: - logging.error('[!] TwitterError %s'%(str(e))) + if 'event' in msg: + logging.debug("{^} %s"%(msg)) + + except StopIteration: + print("stopping iteration") + except TwitterError as e: + logging.error('[!] TwitterError %s'%(str(e))) From 961bc8ec8acf97a9b29c69a200b44de42ee5bae7 Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 09:57:02 -0500 Subject: [PATCH 27/42] wokring on delay for 403 issues --- lib/Stats.py | 4 ++-- lib/UserSubmitted.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Stats.py b/lib/Stats.py index 9d64441..f6cc352 100644 --- a/lib/Stats.py +++ b/lib/Stats.py @@ -59,8 +59,8 @@ def status(self): logging.error('[!] Database Error %s'%(e)) return None - def monitor(self,twitterBot): - while(True): + def monitor(self,twitterBot,isRunning): + while(isRunning.is_set()): try: msg = self.status() if msg: diff --git a/lib/UserSubmitted.py b/lib/UserSubmitted.py index 1dd08f3..e69839a 100644 --- a/lib/UserSubmitted.py +++ b/lib/UserSubmitted.py @@ -32,7 +32,7 @@ def update(self,url): logging.info('Adding User Sumbmitted URL: ' + paste.url) self.put(paste) - def monitor(self, bot): + def monitor(self, bot,isRunning): if not self.empty(): paste = self.get() logging.info('[*] Checking ' + paste.url) From ca67fdf5d2cac74cbd8420b9dd46cc326b53d841 Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 10:05:32 -0500 Subject: [PATCH 28/42] wokring on delay for 403 issues --- dumpmon.py | 4 ++-- lib/TwitterBot.py | 37 ++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/dumpmon.py b/dumpmon.py index 70c37b7..9f5bd36 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -41,7 +41,7 @@ def monitor(): level = logging.DEBUG logging.basicConfig( - format='%(asctime)s [%(levelname)s] [%(module)s] [%(funcName)s] %(message)s', filename=log_file, level=level) + format='%(asctime)s [%(levelname)s][%(module)s][%(funcName)s] %(message)s', filename=log_file, level=level) handler = RotatingFileHandler(log_file, maxBytes=20*1000, backupCount=5) @@ -60,7 +60,7 @@ def monitor(): isRunning.set() #array to keep a handle on threads workers = [] - workers.append(createThread(bot.monitor,isRunning)) + createThread(bot.monitor) workers.append(createThread(Stats().monitor,bot,isRunning)) workers.append(createThread(HaveIBeen().monitor,bot,isRunning)) #workers.append(createThread(Pastebin().monitor,bot,isRunning)) diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index 5190283..84819d2 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -96,26 +96,25 @@ def monitor(self, isRunning): This stream function is blocking and will not yield, thus does not need to be in a loop; refer to the docs """ twitter_userstream = TwitterStream(auth=self.auth, domain='userstream.twitter.com') - - while isRunning.is_set(): - try: - for msg in twitter_userstream.user(): - #logging.debug("{^} %s"%(msg)) - self.auto_follow_followers() - if 'text' in msg: - print("[$] Recieved Tweet %s from %s"%(msg['text'],msg['user']['screen_name'])) - - #process DMs, but only from other people - if 'direct_message' in msg and msg['direct_message']['sender']['screen_name'] != TWITTER_SCREEN_NAME: - self._parseTweet(msg['direct_message'],msg) + + try: + for msg in twitter_userstream.user(): + #logging.debug("{^} %s"%(msg)) + self.auto_follow_followers() + if 'text' in msg: + print("[$] Recieved Tweet %s from %s"%(msg['text'],msg['user']['screen_name'])) + + #process DMs, but only from other people + if 'direct_message' in msg and msg['direct_message']['sender']['screen_name'] != TWITTER_SCREEN_NAME: + self._parseTweet(msg['direct_message'],msg) + + if 'event' in msg: + logging.debug("{^} %s"%(msg)) - if 'event' in msg: - logging.debug("{^} %s"%(msg)) - - except StopIteration: - print("stopping iteration") - except TwitterError as e: - logging.error('[!] TwitterError %s'%(str(e))) + except StopIteration: + print("stopping iteration") + except TwitterError as e: + logging.error('[!] TwitterError %s'%(str(e))) From 8b0e3a5e24631fd4cad883c3e3daa733e3ddee38 Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 10:09:13 -0500 Subject: [PATCH 29/42] wokring on delay for 403 issues --- dumpmon.py | 6 +++--- lib/Stats.py | 32 +++++++++++++++++--------------- lib/TwitterBot.py | 2 +- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/dumpmon.py b/dumpmon.py index 9f5bd36..028f198 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -61,7 +61,7 @@ def monitor(): #array to keep a handle on threads workers = [] createThread(bot.monitor) - workers.append(createThread(Stats().monitor,bot,isRunning)) + createThread(Stats().monitor,bot) workers.append(createThread(HaveIBeen().monitor,bot,isRunning)) #workers.append(createThread(Pastebin().monitor,bot,isRunning)) #workers.append(createThread(Slexy().monitor,bot,isRunning)) @@ -74,11 +74,11 @@ def monitor(): except KeyboardInterrupt: #signal threads to shutdown isRunning.clear() - + print 'stopping' #wait for threads to join for t in workers: t.join() - + print 'stopped' logging.warn('Stopped.') diff --git a/lib/Stats.py b/lib/Stats.py index f6cc352..397763b 100644 --- a/lib/Stats.py +++ b/lib/Stats.py @@ -59,18 +59,20 @@ def status(self): logging.error('[!] Database Error %s'%(e)) return None - def monitor(self,twitterBot,isRunning): - while(isRunning.is_set()): - try: - msg = self.status() - if msg: - with twitterBot.tweetLock: - try: - logging.debug('[+] Status Tweet %s'%(msg)) - twitterBot.statuses.update(status=msg) - except TwitterError as e: - logging.debug('[!] TwitterError %s'%(str(e))) - except Exception,e: - logging.error('[!] Database Error %s'%(e)) - - time.sleep(STATS_FREQ) \ No newline at end of file + def monitor(self,twitterBot): + try: + msg = self.status() + if msg: + with twitterBot.tweetLock: + try: + logging.debug('[+] Status Tweet %s'%(msg)) + twitterBot.statuses.update(status=msg) + except TwitterError as e: + logging.debug('[!] TwitterError %s'%(str(e))) + except Exception,e: + logging.error('[!] Database Error %s'%(e)) + + time.sleep(STATS_FREQ) + + + \ No newline at end of file diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index 84819d2..c7d3bf2 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -90,7 +90,7 @@ def auto_follow_followers(self): except Exception as e: logging.error('[!] Error trying to add followers: %s '%(str(e))) - def monitor(self, isRunning): + def monitor(self): """ This function is expected to be on a separate thread. This stream function is blocking and will not yield, thus does not need to be in a loop; refer to the docs From 4f1f6068745a7006f7f39945e37077cff19ebd55 Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 10:12:22 -0500 Subject: [PATCH 30/42] wokring on delay for 403 issues --- lib/HaveIBeen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/HaveIBeen.py b/lib/HaveIBeen.py index 9a92279..1d7027a 100644 --- a/lib/HaveIBeen.py +++ b/lib/HaveIBeen.py @@ -22,7 +22,7 @@ def __init__(self, id): self.url = 'http://pastebin.com/raw.php?i=' + self.id def get(self): - return helper.curl(self.url) + self.text = helper.curl(self.url) class HaveIBeen(Site): def __init__(self): From edc65c0df1d8ce896a58a68c0669ecc18bf47bf7 Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 10:23:35 -0500 Subject: [PATCH 31/42] wokring on delay for 403 issues --- lib/Paste.py | 2 +- lib/Site.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/Paste.py b/lib/Paste.py index 8d05b4c..f5f89d6 100644 --- a/lib/Paste.py +++ b/lib/Paste.py @@ -23,7 +23,7 @@ def __eq__(self,comparePaste): #logging.info('id %s compares to %s'%(self.id, comparePaste.id)) return self.id == comparePaste.id - def __repr__(self): + def row(self): return { 'pid' : self.id, 'text' : self.text, diff --git a/lib/Site.py b/lib/Site.py index 5654c97..cca0f81 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -101,6 +101,7 @@ def monitor(self, bot, isRunning): self.update() while isRunning.is_set(): while not self.empty(): + if not isRunning.is_set(): return #need to sleep to avoid the ban.... time.sleep(randint(5,17)) paste = self.get() @@ -111,7 +112,7 @@ def monitor(self, bot, isRunning): with bot.tweetLock: if USE_DB: try: - self.db_client.save(repr(paste)) + self.db_client.save(paste.row) except Exception as e: logging.error('[!] MongoDB Error %s'%(str(e))) try: @@ -121,6 +122,7 @@ def monitor(self, bot, isRunning): logging.error('[!] TwitterError %s'%(str(e))) self.update() while self.empty(): + if not isRunning.is_set(): return logging.debug('[*] No results... sleeping') time.sleep(self.sleep) self.update() From 46d7f4892e9f312bdb2b2f3e1d6563b80af5316e Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 10:34:18 -0500 Subject: [PATCH 32/42] wokring on delay for 403 issues --- lib/Site.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/Site.py b/lib/Site.py index cca0f81..0e51d72 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -101,9 +101,8 @@ def monitor(self, bot, isRunning): self.update() while isRunning.is_set(): while not self.empty(): - if not isRunning.is_set(): return #need to sleep to avoid the ban.... - time.sleep(randint(5,17)) + time.sleep(randint(2,5)) paste = self.get() paste.get() tweet = helper.build_tweet(paste) @@ -122,7 +121,6 @@ def monitor(self, bot, isRunning): logging.error('[!] TwitterError %s'%(str(e))) self.update() while self.empty(): - if not isRunning.is_set(): return logging.debug('[*] No results... sleeping') time.sleep(self.sleep) self.update() From fc01084439650b95382abc606c459ce079508e94 Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 10:35:47 -0500 Subject: [PATCH 33/42] wokring on delay for 403 issues --- lib/Site.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Site.py b/lib/Site.py index 0e51d72..22d147c 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -111,7 +111,7 @@ def monitor(self, bot, isRunning): with bot.tweetLock: if USE_DB: try: - self.db_client.save(paste.row) + self.db_client.save(paste.row()) except Exception as e: logging.error('[!] MongoDB Error %s'%(str(e))) try: From 24aaed40af563e0f267d34792cac9d93e84f0145 Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 11:24:36 -0500 Subject: [PATCH 34/42] wokring on delay for 403 issues --- lib/Site.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/Site.py b/lib/Site.py index 22d147c..e94e92c 100644 --- a/lib/Site.py +++ b/lib/Site.py @@ -101,6 +101,7 @@ def monitor(self, bot, isRunning): self.update() while isRunning.is_set(): while not self.empty(): + if not isRunning.is_set(): break #need to sleep to avoid the ban.... time.sleep(randint(2,5)) paste = self.get() @@ -119,9 +120,13 @@ def monitor(self, bot, isRunning): bot.statuses.update(status=tweet) except TwitterError as e: logging.error('[!] TwitterError %s'%(str(e))) + if not isRunning.is_set(): break self.update() while self.empty(): logging.debug('[*] No results... sleeping') time.sleep(self.sleep) self.update() + self.terminating() + + From 824353e3758d4f36253289e9dc94ff22ba1d46ef Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 11:30:50 -0500 Subject: [PATCH 35/42] wokring on delay for 403 issues --- dumpmon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dumpmon.py b/dumpmon.py index 028f198..3b9de73 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -61,8 +61,8 @@ def monitor(): #array to keep a handle on threads workers = [] createThread(bot.monitor) - createThread(Stats().monitor,bot) - workers.append(createThread(HaveIBeen().monitor,bot,isRunning)) + #createThread(Stats().monitor,bot) + #workers.append(createThread(HaveIBeen().monitor,bot,isRunning)) #workers.append(createThread(Pastebin().monitor,bot,isRunning)) #workers.append(createThread(Slexy().monitor,bot,isRunning)) #workers.append(createThread(Pastie().monitor,bot,isRunning)) From de901bf1e8d9bca57798da2de697178c6187c0d4 Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 11:52:55 -0500 Subject: [PATCH 36/42] wokring on delay for 403 issues --- lib/TwitterBot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index c7d3bf2..7cd52df 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -80,7 +80,8 @@ def auto_follow_followers(self): following = set(self.friends.ids(screen_name=TWITTER_SCREEN_NAME)["ids"]) followers = set(self.followers.ids(screen_name=TWITTER_SCREEN_NAME)["ids"]) - + for f in followers: + print f not_following_back = followers - following for user_id in not_following_back: @@ -96,11 +97,10 @@ def monitor(self): This stream function is blocking and will not yield, thus does not need to be in a loop; refer to the docs """ twitter_userstream = TwitterStream(auth=self.auth, domain='userstream.twitter.com') - + self.auto_follow_followers() try: for msg in twitter_userstream.user(): #logging.debug("{^} %s"%(msg)) - self.auto_follow_followers() if 'text' in msg: print("[$] Recieved Tweet %s from %s"%(msg['text'],msg['user']['screen_name'])) From dfce5a8f0ca279ad258202dd2cf990d4debf5f10 Mon Sep 17 00:00:00 2001 From: j105rob Date: Thu, 4 Dec 2014 13:08:08 -0500 Subject: [PATCH 37/42] added pwn3d DM --- dumpmon.py | 10 +++++----- lib/Stats.py | 4 ++++ lib/TwitterBot.py | 22 +++++++++++++--------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/dumpmon.py b/dumpmon.py index 3b9de73..6152358 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -61,11 +61,11 @@ def monitor(): #array to keep a handle on threads workers = [] createThread(bot.monitor) - #createThread(Stats().monitor,bot) - #workers.append(createThread(HaveIBeen().monitor,bot,isRunning)) - #workers.append(createThread(Pastebin().monitor,bot,isRunning)) - #workers.append(createThread(Slexy().monitor,bot,isRunning)) - #workers.append(createThread(Pastie().monitor,bot,isRunning)) + createThread(Stats().monitor,bot) + workers.append(createThread(HaveIBeen().monitor,bot,isRunning)) + workers.append(createThread(Pastebin().monitor,bot,isRunning)) + workers.append(createThread(Slexy().monitor,bot,isRunning)) + workers.append(createThread(Pastie().monitor,bot,isRunning)) # Let threads run try: diff --git a/lib/Stats.py b/lib/Stats.py index 397763b..e012536 100644 --- a/lib/Stats.py +++ b/lib/Stats.py @@ -14,6 +14,8 @@ def __init__(self): except pymongo.errors.ConnectionFailure, e: logging.error('[!] Database failed to start %s'%(e)) + self.cacheEmail = self.uniqueEmailSet() + def uniqueEmailSet(self): map = Code("function () {" " this.emails.forEach(function(z) {" @@ -53,6 +55,8 @@ def status(self): try: e = self.uniqueEmailSet().count() h = self.uniqueHashSet().count() + self.cacheEmail = self.uniqueEmailSet() + msg = "Status as of %s: \n Unique emails: %i, Unique hashes: %i\n #infosec #dataleak"%(time.strftime("%c"),e,h) return msg except Exception,e: diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py index 7cd52df..8693fbc 100644 --- a/lib/TwitterBot.py +++ b/lib/TwitterBot.py @@ -23,6 +23,13 @@ def __init__(self,regexMgr): self.userSubmittedSite = UserSubmitted() logging.info('[+] Started TwitterBot') + def pwn3d(self,aryDM,user): + i = self.statusMgr.cacheEmail.find_one({'_id':aryDM[1]}) + if i: + return "Yes, I was able to find %s email %d times in data leaks."%(i['_id'],int(i['value'])) + else: + return "No, I haven't found %s email yet in data breaches."%(aryDM[1]) + def check(self,aryDM,user): self.userSubmittedSite.update(aryDM[1]) response = self.userSubmittedSite.monitor(self) @@ -71,19 +78,17 @@ def _parseTweet(self,dm,t): except TwitterError as e: logging.debug('[!] TwitterError %s'%(str(e))) except Exception as e: - logging.error('[!] Error trying to parse DM: %s '%(aryDM)) + logging.error('[!] Error trying to parse DM: %s %s'%(aryDM,str(e))) def auto_follow_followers(self): """ Follows back everyone who's followed you """ - following = set(self.friends.ids(screen_name=TWITTER_SCREEN_NAME)["ids"]) followers = set(self.followers.ids(screen_name=TWITTER_SCREEN_NAME)["ids"]) - for f in followers: - print f + not_following_back = followers - following - + for user_id in not_following_back: try: self.friendships.create(user_id=user_id, follow=True) @@ -99,8 +104,7 @@ def monitor(self): twitter_userstream = TwitterStream(auth=self.auth, domain='userstream.twitter.com') self.auto_follow_followers() try: - for msg in twitter_userstream.user(): - #logging.debug("{^} %s"%(msg)) + for msg in twitter_userstream.user(): if 'text' in msg: print("[$] Recieved Tweet %s from %s"%(msg['text'],msg['user']['screen_name'])) @@ -108,8 +112,8 @@ def monitor(self): if 'direct_message' in msg and msg['direct_message']['sender']['screen_name'] != TWITTER_SCREEN_NAME: self._parseTweet(msg['direct_message'],msg) - if 'event' in msg: - logging.debug("{^} %s"%(msg)) + if 'source' in msg and 'follow_request_sent' in msg['source']: + self.auto_follow_followers() except StopIteration: print("stopping iteration") From 021a875364dd9c5c4563c669436f5569acfdb992 Mon Sep 17 00:00:00 2001 From: j105rob Date: Fri, 5 Dec 2014 06:55:35 -0500 Subject: [PATCH 38/42] random fixes --- dumpmon.py | 5 +++++ lib/Paste.py | 4 +++- lib/Stats.py | 2 +- lib/helper.py | 3 +++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/dumpmon.py b/dumpmon.py index 6152358..b517f2c 100644 --- a/dumpmon.py +++ b/dumpmon.py @@ -58,10 +58,15 @@ def monitor(): #create an event to tell threads to keep running isRunning = threading.Event() isRunning.set() + #array to keep a handle on threads workers = [] + + #these next 2 workers don't need to be joined when termd createThread(bot.monitor) createThread(Stats().monitor,bot) + + #these workers need to be shut down gracefully workers.append(createThread(HaveIBeen().monitor,bot,isRunning)) workers.append(createThread(Pastebin().monitor,bot,isRunning)) workers.append(createThread(Slexy().monitor,bot,isRunning)) diff --git a/lib/Paste.py b/lib/Paste.py index f5f89d6..ab28782 100644 --- a/lib/Paste.py +++ b/lib/Paste.py @@ -2,6 +2,7 @@ import settings import logging import re +import time class Paste(object): def __init__(self,id): @@ -33,7 +34,8 @@ def row(self): 'num_hashes' : self.num_hashes, 'type' : self.type, 'db_keywords' : self.db_keywords, - 'url' : self.url + 'url' : self.url, + "added":time.strftime("%c") } def get(self): diff --git a/lib/Stats.py b/lib/Stats.py index e012536..0e7b6a6 100644 --- a/lib/Stats.py +++ b/lib/Stats.py @@ -14,7 +14,7 @@ def __init__(self): except pymongo.errors.ConnectionFailure, e: logging.error('[!] Database failed to start %s'%(e)) - self.cacheEmail = self.uniqueEmailSet() + self.cacheEmail = self.uniqueEmailSet() def uniqueEmailSet(self): map = Code("function () {" diff --git a/lib/helper.py b/lib/helper.py index a5a28b5..4cb174b 100644 --- a/lib/helper.py +++ b/lib/helper.py @@ -27,6 +27,9 @@ def curl (url,referer=None): c.setopt(c.URL, url) c.setopt(c.WRITEDATA, buffer) + # Follow redirect. + c.setopt(c.FOLLOWLOCATION, True) + if referer: c.setopt(c.REFERER, referer) From a995f7210072a3bfd4124ddbddf9ab68a2c12d68 Mon Sep 17 00:00:00 2001 From: j105rob Date: Wed, 10 Dec 2014 07:07:16 -0500 Subject: [PATCH 39/42] adding handler for empty paste --- .gitignore | 2 + .project | 17 ++++++++ .pydevproject | 5 +++ lib/Paste.py | 105 ++++++++++++++++++++++++------------------------ lib/Pastebin.py | 6 ++- 5 files changed, 82 insertions(+), 53 deletions(-) create mode 100644 .project create mode 100644 .pydevproject diff --git a/.gitignore b/.gitignore index fc2cb1e..eb45148 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ # project settings.py output.log +.project +.pydevproject # python specific *.pyc diff --git a/.project b/.project new file mode 100644 index 0000000..6d730ed --- /dev/null +++ b/.project @@ -0,0 +1,17 @@ + + + dumpmon + + + + + + org.python.pydev.PyDevBuilder + + + + + + org.python.pydev.pythonNature + + diff --git a/.pydevproject b/.pydevproject new file mode 100644 index 0000000..40e9f40 --- /dev/null +++ b/.pydevproject @@ -0,0 +1,5 @@ + + +Default +python 2.7 + diff --git a/lib/Paste.py b/lib/Paste.py index ab28782..9dbfc11 100644 --- a/lib/Paste.py +++ b/lib/Paste.py @@ -59,61 +59,62 @@ def match(self): # Get the amount of emails try: r = self.text.splitlines() - logging.debug("[*] Num Lines in text: %i"%(len(r))) - except Exception as e: - logging.debug("[!] Error: %s"%(str(e))) - + logging.debug("[*] Num Lines in text: %i"%(len(r))) - if regexes['email'].search(self.text): - self.emails = regexes['email'].findall(self.text) - - if regexes['email2'].search(self.text): - self.emails2 = regexes['email2'].findall(self.text) - - self.hashes = regexes['hash32'].findall(self.text) - - self.num_emails = len(self.emails) - logging.debug("[*] Num Emails: %i"%(self.num_emails)) - - self.num_emails = len(self.emails2) - logging.debug("[*] Num Emails2: %i"%(self.num_emails)) - - self.num_hashes = len(self.hashes) - logging.debug("[*] Num Hashes: %i"%(self.num_hashes)) - - if self.num_emails > 0: - self.sites = list(set([re.search('@(.*)$', email).group(1).lower() for email in self.emails])) - logging.debug("[*] Num Sites: %i"%(len(self.sites))) - - for regex in regexes['db_keywords']: - if regex.search(self.text): - logging.debug('\t[+] ' + regex.search(self.text).group(1)) - self.db_keywords += round(1/float( - len(regexes['db_keywords'])), 2) - - for regex in regexes['blacklist']: - if regex.search(self.text): - logging.debug('\t[-] ' + regex.search(self.text).group(1)) - self.db_keywords -= round(1.25 * ( - 1/float(len(regexes['db_keywords']))), 2) + if regexes['email'].search(self.text): + self.emails = regexes['email'].findall(self.text) - if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD): - self.type = 'db_dump' + if regexes['email2'].search(self.text): + self.emails2 = regexes['email2'].findall(self.text) - if regexes['cisco_hash'].search(self.text) or regexes['cisco_pass'].search(self.text): - self.type = 'cisco' + self.hashes = regexes['hash32'].findall(self.text) - if regexes['honeypot'].search(self.text): - self.type = 'honeypot' + self.num_emails = len(self.emails) + logging.debug("[*] Num Emails: %i"%(self.num_emails)) - if regexes['google_api'].search(self.text): - self.type = 'google_api' + self.num_emails = len(self.emails2) + logging.debug("[*] Num Emails2: %i"%(self.num_emails)) - # if regexes['juniper'].search(self.text): self.type = 'Juniper' - for regex in regexes['banlist']: - if regex.search(self.text): - self.type = None - break - - logging.debug("[*] Type: %s"%(self.type)) - return self.type + self.num_hashes = len(self.hashes) + logging.debug("[*] Num Hashes: %i"%(self.num_hashes)) + + if self.num_emails > 0: + self.sites = list(set([re.search('@(.*)$', email).group(1).lower() for email in self.emails])) + logging.debug("[*] Num Sites: %i"%(len(self.sites))) + + for regex in regexes['db_keywords']: + if regex.search(self.text): + logging.debug('\t[+] ' + regex.search(self.text).group(1)) + self.db_keywords += round(1/float( + len(regexes['db_keywords'])), 2) + + for regex in regexes['blacklist']: + if regex.search(self.text): + logging.debug('\t[-] ' + regex.search(self.text).group(1)) + self.db_keywords -= round(1.25 * ( + 1/float(len(regexes['db_keywords']))), 2) + + if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD): + self.type = 'db_dump' + + if regexes['cisco_hash'].search(self.text) or regexes['cisco_pass'].search(self.text): + self.type = 'cisco' + + if regexes['honeypot'].search(self.text): + self.type = 'honeypot' + + if regexes['google_api'].search(self.text): + self.type = 'google_api' + + # if regexes['juniper'].search(self.text): self.type = 'Juniper' + for regex in regexes['banlist']: + if regex.search(self.text): + self.type = None + break + + logging.debug("[*] Type: %s"%(self.type)) + return self.type + + except Exception as e: + logging.debug("[!] Error: %s"%(str(e))) + return None diff --git a/lib/Pastebin.py b/lib/Pastebin.py index f3b7960..bc63eb4 100644 --- a/lib/Pastebin.py +++ b/lib/Pastebin.py @@ -26,7 +26,11 @@ def __init__(self): self.sleep = SLEEP_PASTEBIN super(Pastebin, self).__init__() logging.info('[+] Started PasteBin') - + + def terminating(self): + #TODO: persist the seen queue + pass + def parse(self): return BeautifulSoup(helper.curl(self.BASE_URL + '/archive')).find_all( lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:]) From 053a6ceb34eba36aada6ece906ecc4f620a26078 Mon Sep 17 00:00:00 2001 From: j105rob Date: Mon, 29 Dec 2014 06:34:14 -0500 Subject: [PATCH 40/42] fixed the None return from the curl" --- lib/UserSubmitted.py | 2 +- lib/helper.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/UserSubmitted.py b/lib/UserSubmitted.py index e69839a..1dd08f3 100644 --- a/lib/UserSubmitted.py +++ b/lib/UserSubmitted.py @@ -32,7 +32,7 @@ def update(self,url): logging.info('Adding User Sumbmitted URL: ' + paste.url) self.put(paste) - def monitor(self, bot,isRunning): + def monitor(self, bot): if not self.empty(): paste = self.get() logging.info('[*] Checking ' + paste.url) diff --git a/lib/helper.py b/lib/helper.py index 4cb174b..9ad8f64 100644 --- a/lib/helper.py +++ b/lib/helper.py @@ -45,9 +45,13 @@ def curl (url,referer=None): if rc != 200: logging.error('[!] %s Response code: %d'%(url,rc)) - return buffer.getvalue() + r = buffer.getvalue() + #sometimes the buffer is None + return r if r else "no html could be returned" + except Exception as e: logging.error('[!] Curl Error: %s'%(str(e))) + return "error" def download(url, headers=None): if not headers: From e37b5a2a2575fe1c9f41f202d4d8d76fde4839fd Mon Sep 17 00:00:00 2001 From: j105rob Date: Wed, 31 Dec 2014 07:22:25 -0500 Subject: [PATCH 41/42] changed logging msgs to debug where applicable --- lib/HaveIBeen.py | 4 ++-- lib/Paste.py | 2 +- lib/Pastebin.py | 4 ++-- lib/Pastie.py | 4 ++-- lib/Slexy.py | 4 ++-- lib/UserSubmitted.py | 8 ++++---- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/HaveIBeen.py b/lib/HaveIBeen.py index 1d7027a..94024e1 100644 --- a/lib/HaveIBeen.py +++ b/lib/HaveIBeen.py @@ -40,7 +40,7 @@ def _parse(self): return None def update(self): - logging.info('Retrieving HaveIBeenPwned ID\'s') + logging.debug('Retrieving HaveIBeenPwned ID\'s') i=0 for entry in self._parse(): @@ -50,7 +50,7 @@ def update(self): if not self.hasSeen(paste): i+=1 self.put(paste) - logging.info('HaveIBeenPwned Added URLs: ' + str(i)) + logging.debug('HaveIBeenPwned Added URLs: ' + str(i)) diff --git a/lib/Paste.py b/lib/Paste.py index 9dbfc11..ebe1072 100644 --- a/lib/Paste.py +++ b/lib/Paste.py @@ -116,5 +116,5 @@ def match(self): return self.type except Exception as e: - logging.debug("[!] Error: %s"%(str(e))) + logging.error("[!] Error: %s"%(str(e))) return None diff --git a/lib/Pastebin.py b/lib/Pastebin.py index bc63eb4..94303a1 100644 --- a/lib/Pastebin.py +++ b/lib/Pastebin.py @@ -37,7 +37,7 @@ def parse(self): def update(self): '''update(self) - Fill Queue with new Pastebin IDs''' - logging.info('Retrieving Pastebin ID\'s') + logging.debug('Retrieving Pastebin ID\'s') i=0 for entry in self.parse(): paste = PastebinPaste(entry.a['href'][1:]) @@ -45,5 +45,5 @@ def update(self): #logging.info('Adding URL: ' + paste.url) i+=1 self.put(paste) - logging.info('Pastebin Added URLs: ' + str(i)) + logging.debug('Pastebin Added URLs: ' + str(i)) diff --git a/lib/Pastie.py b/lib/Pastie.py index 3cf4a43..6a38b12 100644 --- a/lib/Pastie.py +++ b/lib/Pastie.py @@ -34,7 +34,7 @@ def parse(self): def update(self): '''update(self) - Fill Queue with new Pastie IDs''' - logging.info('Retrieving Pastie ID\'s') + logging.debug('Retrieving Pastie ID\'s') i=0 for entry in self.parse(): paste = PastiePaste(entry.a['href'].replace( @@ -42,5 +42,5 @@ def update(self): if not self.hasSeen(paste): i+=1 self.put(paste) - logging.info('Pastie Added URLs: ' + str(i)) + logging.debug('Pastie Added URLs: ' + str(i)) diff --git a/lib/Slexy.py b/lib/Slexy.py index 7460225..b1c41d2 100644 --- a/lib/Slexy.py +++ b/lib/Slexy.py @@ -30,7 +30,7 @@ def parse(self): def update(self): '''update(self) - Fill Queue with new Slexy IDs''' - logging.info('[*] Retrieving Slexy ID\'s') + logging.debug('[*] Retrieving Slexy ID\'s') i=0 for entry in self.parse(): @@ -38,6 +38,6 @@ def update(self): if not self.hasSeen(paste): i+=1 self.put(paste) - logging.info('Slexy Added URLs: ' + str(i)) + logging.debug('Slexy Added URLs: ' + str(i)) diff --git a/lib/UserSubmitted.py b/lib/UserSubmitted.py index 1dd08f3..05803c2 100644 --- a/lib/UserSubmitted.py +++ b/lib/UserSubmitted.py @@ -13,7 +13,7 @@ def __init__(self, url): super(UserSubmittedPaste, self).__init__(url) self.headers = None self.url = resolve(url) - logging.info('[+] URL expanded to %s'%(self.url)) + logging.debug('[+] URL expanded to %s'%(self.url)) def get(self): self.text = helper.curl(self.url) @@ -29,17 +29,17 @@ def parse(self): def update(self,url): paste = UserSubmittedPaste(url) if not self.hasSeen(paste): - logging.info('Adding User Sumbmitted URL: ' + paste.url) + logging.debug('Adding User Sumbmitted URL: ' + paste.url) self.put(paste) def monitor(self, bot): if not self.empty(): paste = self.get() - logging.info('[*] Checking ' + paste.url) + logging.debug('[*] Checking ' + paste.url) paste.get() tweet = helper.build_tweet(paste) if tweet: - logging.info(tweet) + logging.debug(tweet) with bot.tweetLock: if USE_DB: self.db_client.save(repr(paste)) From 8af3fe94f82511e58bf091b74728112ecb994738 Mon Sep 17 00:00:00 2001 From: j105rob Date: Wed, 31 Dec 2014 07:45:25 -0500 Subject: [PATCH 42/42] changed logging msgs to debug where applicable --- lib/Stats.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/Stats.py b/lib/Stats.py index 0e7b6a6..0c13e7f 100644 --- a/lib/Stats.py +++ b/lib/Stats.py @@ -13,8 +13,9 @@ def __init__(self): self.client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes except pymongo.errors.ConnectionFailure, e: logging.error('[!] Database failed to start %s'%(e)) - - self.cacheEmail = self.uniqueEmailSet() + #commenting this cache call out; taking too much time at start up. + #self.cacheEmail = self.uniqueEmailSet() + self.cacheEmail = None def uniqueEmailSet(self): map = Code("function () {"