diff --git a/.gitignore b/.gitignore
index fc2cb1e..eb45148 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
# project
settings.py
output.log
+.project
+.pydevproject
# python specific
*.pyc
diff --git a/.project b/.project
new file mode 100644
index 0000000..6d730ed
--- /dev/null
+++ b/.project
@@ -0,0 +1,17 @@
+
+
+ dumpmon
+
+
+
+
+
+ org.python.pydev.PyDevBuilder
+
+
+
+
+
+ org.python.pydev.pythonNature
+
+
diff --git a/.pydevproject b/.pydevproject
new file mode 100644
index 0000000..40e9f40
--- /dev/null
+++ b/.pydevproject
@@ -0,0 +1,5 @@
+
+
+Default
+python 2.7
+
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dumpmon.py b/dumpmon.py
index a315673..b517f2c 100644
--- a/dumpmon.py
+++ b/dumpmon.py
@@ -12,12 +12,17 @@
from lib.Pastebin import Pastebin, PastebinPaste
from lib.Slexy import Slexy, SlexyPaste
from lib.Pastie import Pastie, PastiePaste
-from lib.helper import log
+from lib.HaveIBeen import HaveIBeen, HaveIBeenPaste
+
+from lib.helper import log, createThread
+from lib.TwitterBot import TwitterBot
+from lib.RegexMgr import RegexMgr
+from lib.Stats import Stats
from time import sleep
-from twitter import Twitter, OAuth
-from settings import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET, log_file
+from settings import log_file
import threading
import logging
+from logging.handlers import RotatingFileHandler
def monitor():
@@ -30,36 +35,55 @@ def monitor():
parser.add_argument(
"-v", "--verbose", help="more verbose", action="store_true")
args = parser.parse_args()
+
level = logging.INFO
if args.verbose:
level = logging.DEBUG
+
logging.basicConfig(
- format='%(asctime)s [%(levelname)s] %(message)s', filename=log_file, level=level)
+ format='%(asctime)s [%(levelname)s][%(module)s][%(funcName)s] %(message)s', filename=log_file, level=level)
+
+ handler = RotatingFileHandler(log_file, maxBytes=20*1000,
+ backupCount=5)
+ #logging.addHandler(handler)
+
logging.info('Monitoring...')
- bot = Twitter(
- auth=OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET,
- CONSUMER_KEY, CONSUMER_SECRET)
- )
- # Create lock for both output log and tweet action
+
+ regexMgr = RegexMgr()
+ bot = TwitterBot(regexMgr)
+
+ # Create lock for output log
log_lock = threading.Lock()
- tweet_lock = threading.Lock()
-
- pastebin_thread = threading.Thread(
- target=Pastebin().monitor, args=[bot, tweet_lock])
- slexy_thread = threading.Thread(
- target=Slexy().monitor, args=[bot, tweet_lock])
- pastie_thead = threading.Thread(
- target=Pastie().monitor, args=[bot, tweet_lock])
-
- for thread in (pastebin_thread, slexy_thread, pastie_thead):
- thread.daemon = True
- thread.start()
+
+ #create an event to tell threads to keep running
+ isRunning = threading.Event()
+ isRunning.set()
+
+ #array to keep a handle on threads
+ workers = []
+
+ #these next 2 workers don't need to be joined when termd
+ createThread(bot.monitor)
+ createThread(Stats().monitor,bot)
+
+ #these workers need to be shut down gracefully
+ workers.append(createThread(HaveIBeen().monitor,bot,isRunning))
+ workers.append(createThread(Pastebin().monitor,bot,isRunning))
+ workers.append(createThread(Slexy().monitor,bot,isRunning))
+ workers.append(createThread(Pastie().monitor,bot,isRunning))
# Let threads run
try:
while(1):
sleep(5)
except KeyboardInterrupt:
+ #signal threads to shutdown
+ isRunning.clear()
+ print 'stopping'
+ #wait for threads to join
+ for t in workers:
+ t.join()
+ print 'stopped'
logging.warn('Stopped.')
diff --git a/lib/HaveIBeen.py b/lib/HaveIBeen.py
new file mode 100644
index 0000000..94024e1
--- /dev/null
+++ b/lib/HaveIBeen.py
@@ -0,0 +1,60 @@
+"""
+Troy Hunt's RSS Feed for the last 50 pastes
+
+http://feeds.feedburner.com/HaveIBeenPwnedLatestPastes
+
+"""
+import feedparser
+
+from .Site import Site
+from .Paste import Paste
+from bs4 import BeautifulSoup
+from . import helper
+from time import sleep
+from settings import SLEEP_HAVEIBEEN
+from twitter import TwitterError
+import logging
+
+class HaveIBeenPaste(Paste):
+ def __init__(self, id):
+ super(HaveIBeenPaste, self).__init__(id)
+ self.headers = None
+ self.url = 'http://pastebin.com/raw.php?i=' + self.id
+
+ def get(self):
+ self.text = helper.curl(self.url)
+
+class HaveIBeen(Site):
+ def __init__(self):
+ super(HaveIBeen, self).__init__()
+ self.sleep = SLEEP_HAVEIBEEN
+ logging.info('[+] Started HaveIBeen')
+ self.feedURL = 'http://feeds.feedburner.com/HaveIBeenPwnedLatestPastes'
+
+ def _parse(self):
+ try:
+ d = feedparser.parse(self.feedURL)
+ return d['entries']
+ except Exception as e:
+ logging.error('[!] Feed Parser Error: %s'%(str(e)))
+ return None
+
+ def update(self):
+ logging.debug('Retrieving HaveIBeenPwned ID\'s')
+ i=0
+
+ for entry in self._parse():
+ l = entry['links'][0]['href']
+ link = l.split(r'/')
+ paste = HaveIBeenPaste(link[3])
+ if not self.hasSeen(paste):
+ i+=1
+ self.put(paste)
+ logging.debug('HaveIBeenPwned Added URLs: ' + str(i))
+
+
+
+
+if __name__ == '__main__':
+ c = HaveIBeen()
+ c.update()
\ No newline at end of file
diff --git a/lib/Paste.py b/lib/Paste.py
index bc379c6..ebe1072 100644
--- a/lib/Paste.py
+++ b/lib/Paste.py
@@ -2,22 +2,47 @@
import settings
import logging
import re
+import time
class Paste(object):
- def __init__(self):
+ def __init__(self,id):
'''
class Paste: Generic "Paste" object to contain attributes of a standard paste
-
'''
- self.emails = 0
- self.hashes = 0
+ self.id = id
+ self.emails = []
+ self.emails2 = []
+ self.hashes = []
self.num_emails = 0
self.num_hashes = 0
self.text = None
self.type = None
self.sites = None
self.db_keywords = 0.0
-
+
+ def __eq__(self,comparePaste):
+ #logging.info('id %s compares to %s'%(self.id, comparePaste.id))
+ return self.id == comparePaste.id
+
+ def row(self):
+ return {
+ 'pid' : self.id,
+ 'text' : self.text,
+ 'emails' : self.emails,
+ 'hashes' : self.hashes,
+ 'num_emails' : self.num_emails,
+ 'num_hashes' : self.num_hashes,
+ 'type' : self.type,
+ 'db_keywords' : self.db_keywords,
+ 'url' : self.url,
+ "added":time.strftime("%c")
+ }
+
+ def get(self):
+ #override this
+ logging.error('[@] Function Not Implemented in Subclass')
+ pass
+
def match(self):
'''
Matches the paste against a series of regular expressions to determine if the paste is 'interesting'
@@ -32,33 +57,64 @@ def match(self):
'''
# Get the amount of emails
- self.emails = list(set(regexes['email'].findall(self.text)))
- self.hashes = regexes['hash32'].findall(self.text)
- self.num_emails = len(self.emails)
- self.num_hashes = len(self.hashes)
- if self.num_emails > 0:
- self.sites = list(set([re.search('@(.*)$', email).group(1).lower() for email in self.emails]))
- for regex in regexes['db_keywords']:
- if regex.search(self.text):
- logging.debug('\t[+] ' + regex.search(self.text).group(1))
- self.db_keywords += round(1/float(
- len(regexes['db_keywords'])), 2)
- for regex in regexes['blacklist']:
- if regex.search(self.text):
- logging.debug('\t[-] ' + regex.search(self.text).group(1))
- self.db_keywords -= round(1.25 * (
- 1/float(len(regexes['db_keywords']))), 2)
- if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD):
- self.type = 'db_dump'
- if regexes['cisco_hash'].search(self.text) or regexes['cisco_pass'].search(self.text):
- self.type = 'cisco'
- if regexes['honeypot'].search(self.text):
- self.type = 'honeypot'
- if regexes['google_api'].search(self.text):
- self.type = 'google_api'
- # if regexes['juniper'].search(self.text): self.type = 'Juniper'
- for regex in regexes['banlist']:
- if regex.search(self.text):
- self.type = None
- break
- return self.type
+ try:
+ r = self.text.splitlines()
+ logging.debug("[*] Num Lines in text: %i"%(len(r)))
+
+ if regexes['email'].search(self.text):
+ self.emails = regexes['email'].findall(self.text)
+
+ if regexes['email2'].search(self.text):
+ self.emails2 = regexes['email2'].findall(self.text)
+
+ self.hashes = regexes['hash32'].findall(self.text)
+
+ self.num_emails = len(self.emails)
+ logging.debug("[*] Num Emails: %i"%(self.num_emails))
+
+ self.num_emails = len(self.emails2)
+ logging.debug("[*] Num Emails2: %i"%(self.num_emails))
+
+ self.num_hashes = len(self.hashes)
+ logging.debug("[*] Num Hashes: %i"%(self.num_hashes))
+
+ if self.num_emails > 0:
+ self.sites = list(set([re.search('@(.*)$', email).group(1).lower() for email in self.emails]))
+ logging.debug("[*] Num Sites: %i"%(len(self.sites)))
+
+ for regex in regexes['db_keywords']:
+ if regex.search(self.text):
+ logging.debug('\t[+] ' + regex.search(self.text).group(1))
+ self.db_keywords += round(1/float(
+ len(regexes['db_keywords'])), 2)
+
+ for regex in regexes['blacklist']:
+ if regex.search(self.text):
+ logging.debug('\t[-] ' + regex.search(self.text).group(1))
+ self.db_keywords -= round(1.25 * (
+ 1/float(len(regexes['db_keywords']))), 2)
+
+ if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD):
+ self.type = 'db_dump'
+
+ if regexes['cisco_hash'].search(self.text) or regexes['cisco_pass'].search(self.text):
+ self.type = 'cisco'
+
+ if regexes['honeypot'].search(self.text):
+ self.type = 'honeypot'
+
+ if regexes['google_api'].search(self.text):
+ self.type = 'google_api'
+
+ # if regexes['juniper'].search(self.text): self.type = 'Juniper'
+ for regex in regexes['banlist']:
+ if regex.search(self.text):
+ self.type = None
+ break
+
+ logging.debug("[*] Type: %s"%(self.type))
+ return self.type
+
+ except Exception as e:
+ logging.error("[!] Error: %s"%(str(e)))
+ return None
diff --git a/lib/Pastebin.py b/lib/Pastebin.py
index e9656cd..94303a1 100644
--- a/lib/Pastebin.py
+++ b/lib/Pastebin.py
@@ -10,37 +10,40 @@
class PastebinPaste(Paste):
def __init__(self, id):
- self.id = id
+ super(PastebinPaste, self).__init__(id)
self.headers = None
self.url = 'http://pastebin.com/raw.php?i=' + self.id
- super(PastebinPaste, self).__init__()
-
+
+ def get(self):
+ self.text = helper.curl(self.url)
class Pastebin(Site):
- def __init__(self, last_id=None):
- if not last_id:
- last_id = None
- self.ref_id = last_id
+ """
+ Pastebin will block your IP if you request more than 600 requests in 10 mins. This is per admin@pastebin.com
+ """
+ def __init__(self):
self.BASE_URL = 'http://pastebin.com'
self.sleep = SLEEP_PASTEBIN
super(Pastebin, self).__init__()
-
+ logging.info('[+] Started PasteBin')
+
+ def terminating(self):
+ #TODO: persist the seen queue
+ pass
+
+ def parse(self):
+ return BeautifulSoup(helper.curl(self.BASE_URL + '/archive')).find_all(
+ lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:])
+
def update(self):
'''update(self) - Fill Queue with new Pastebin IDs'''
- logging.info('Retrieving Pastebin ID\'s')
- results = BeautifulSoup(helper.download(self.BASE_URL + '/archive')).find_all(
- lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:])
- new_pastes = []
- if not self.ref_id:
- results = results[:60]
- for entry in results:
+ logging.debug('Retrieving Pastebin ID\'s')
+ i=0
+ for entry in self.parse():
paste = PastebinPaste(entry.a['href'][1:])
- # Check to see if we found our last checked URL
- if paste.id == self.ref_id:
- break
- new_pastes.append(paste)
- for entry in new_pastes[::-1]:
- logging.info('Adding URL: ' + entry.url)
- self.put(entry)
- def get_paste_text(self, paste):
- return helper.download(paste.url)
+ if not self.hasSeen(paste):
+ #logging.info('Adding URL: ' + paste.url)
+ i+=1
+ self.put(paste)
+ logging.debug('Pastebin Added URLs: ' + str(i))
+
diff --git a/lib/Pastie.py b/lib/Pastie.py
index da84e55..6a38b12 100644
--- a/lib/Pastie.py
+++ b/lib/Pastie.py
@@ -10,39 +10,37 @@
class PastiePaste(Paste):
def __init__(self, id):
- self.id = id
+ super(PastiePaste, self).__init__(id)
self.headers = None
self.url = 'http://pastie.org/pastes/' + self.id + '/text'
- super(PastiePaste, self).__init__()
+ def get(self):
+ try:
+ self.text = BeautifulSoup(helper.curl(self.url)).pre.text
+ except Exception as e:
+ logging.error('[!] Beautiful Soup Error: %s'%(str(e)))
+ self.text = None
class Pastie(Site):
- def __init__(self, last_id=None):
- if not last_id:
- last_id = None
- self.ref_id = last_id
+ def __init__(self):
self.BASE_URL = 'http://pastie.org'
self.sleep = SLEEP_PASTIE
super(Pastie, self).__init__()
+ logging.info('[+] Started Pastie')
+
+ def parse(self):
+ return [tag for tag in BeautifulSoup(helper.curl(
+ self.BASE_URL + '/pastes')).find_all('p', 'link') if tag.a]
def update(self):
'''update(self) - Fill Queue with new Pastie IDs'''
- logging.info('Retrieving Pastie ID\'s')
- results = [tag for tag in BeautifulSoup(helper.download(
- self.BASE_URL + '/pastes')).find_all('p', 'link') if tag.a]
- new_pastes = []
- if not self.ref_id:
- results = results[:60]
- for entry in results:
+ logging.debug('Retrieving Pastie ID\'s')
+ i=0
+ for entry in self.parse():
paste = PastiePaste(entry.a['href'].replace(
self.BASE_URL + '/pastes/', ''))
- # Check to see if we found our last checked URL
- if paste.id == self.ref_id:
- break
- new_pastes.append(paste)
- for entry in new_pastes[::-1]:
- logging.debug('Adding URL: ' + entry.url)
- self.put(entry)
+ if not self.hasSeen(paste):
+ i+=1
+ self.put(paste)
+ logging.debug('Pastie Added URLs: ' + str(i))
- def get_paste_text(self, paste):
- return BeautifulSoup(helper.download(paste.url)).pre.text
\ No newline at end of file
diff --git a/lib/RegexMgr.py b/lib/RegexMgr.py
new file mode 100644
index 0000000..f9c8d46
--- /dev/null
+++ b/lib/RegexMgr.py
@@ -0,0 +1,61 @@
+import re
+from pymongo import MongoClient
+from settings import USE_DB, DB_HOST, DB_PORT
+import time
+import logging
+
+import threading
+
+class RegexMgr(object):
+ """
+ This class is intended to handle all the regex stuff and persistance to the DB for observers
+ """
+ def __init__(self):
+ self.regexLock = threading.Lock()
+
+ if USE_DB:
+ try:
+ self.client = MongoClient(DB_HOST, DB_PORT).paste_db.regexes
+ except pymongo.errors.ConnectionFailure, e:
+ logging.error('[!] Database failed to start %s'%(e))
+
+ self.customRegexes = []
+ self._loadRegexes()
+
+
+ def _loadRegexes(self):
+ with self.regexLock:
+ cursor = self.client.find()
+ for row in cursor:
+ customRegex = {}
+ rc = re.compile(row['regex'])
+ customRegex['regex'] = rc
+ customRegex['user'] = row['user']
+ customRegex['added'] = time.strftime("%c")
+ self.customRegexes.append(customRegex)
+
+ logging.info("[+] Loaded custom regexes: %s"%(self.customRegexes))
+
+ def reloadCustomRegexes(self):
+ self.customRegexes = []
+ self._loadRegexes()
+
+ def add(self,regex, user):
+ if self.valid(regex):
+ o = {"user":user,"regex":regex,"added":time.strftime("%c")}
+ self.client.insert(o)
+ self.customRegexes.append(o)
+ return True
+
+ def valid(self,regex):
+ try:
+ re.compile(regex)
+ is_valid = True
+ except re.error:
+ is_valid = False
+ return is_valid
+
+
+
+
+
\ No newline at end of file
diff --git a/lib/Site.py b/lib/Site.py
index 6ab60ef..e94e92c 100644
--- a/lib/Site.py
+++ b/lib/Site.py
@@ -5,10 +5,12 @@
from pymongo import MongoClient
from requests import ConnectionError
from twitter import TwitterError
-from settings import USE_DB, DB_HOST, DB_PORT
+from settings import USE_DB, DB_HOST, DB_PORT, SEEN_DEQUE_LEN
import logging
import helper
+from random import randint
+from collections import deque
class Site(object):
'''
@@ -31,19 +33,33 @@ class Site(object):
# that I could find... So, I decided to implement my own queue with a few
# changes
def __init__(self, queue=None):
+
+ # the double ended queue is used to check the last n URLs to see if they have been processed, since the URLs are random strings.
+ self.seen = deque(maxlen=SEEN_DEQUE_LEN)
+
if queue is None:
self.queue = []
+
if USE_DB:
# Lazily create the db and collection if not present
self.db_client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes
-
+ def addSeen(self,item):
+ self.seen.append(item)
+ #logging.info('[@] Site deque len %i'%(len(self.seen)))
+
+ def hasSeen(self,item):
+ res = item in self.seen
+ #logging.info('[@] URL Seen %s %s'%(item.url,res))
+ return res
+
def empty(self):
return len(self.queue) == 0
def get(self):
if not self.empty():
result = self.queue[0]
+ self.addSeen(result)
del self.queue[0]
else:
result = None
@@ -67,36 +83,50 @@ def clear(self):
def list(self):
print('\n'.join(url for url in self.queue))
- def monitor(self, bot, t_lock):
+ def parse(self):
+ #override this
+ logging.error('[@] Function Not Implemented in Subclass')
+ pass
+
+ def update(self):
+ #override this
+ logging.error('[@] Function Not Implemented in Subclass')
+ pass
+
+ def terminating(self):
+ #this can be overridden in subclass
+ logging.debug('[!] Terminating.....')
+
+ def monitor(self, bot, isRunning):
self.update()
- while(1):
+ while isRunning.is_set():
while not self.empty():
+ if not isRunning.is_set(): break
+ #need to sleep to avoid the ban....
+ time.sleep(randint(2,5))
paste = self.get()
- self.ref_id = paste.id
- logging.info('[*] Checking ' + paste.url)
- paste.text = self.get_paste_text(paste)
+ paste.get()
tweet = helper.build_tweet(paste)
if tweet:
logging.info(tweet)
- with t_lock:
+ with bot.tweetLock:
if USE_DB:
- self.db_client.save({
- 'pid' : paste.id,
- 'text' : paste.text,
- 'emails' : paste.emails,
- 'hashes' : paste.hashes,
- 'num_emails' : paste.num_emails,
- 'num_hashes' : paste.num_hashes,
- 'type' : paste.type,
- 'db_keywords' : paste.db_keywords,
- 'url' : paste.url
- })
+ try:
+ self.db_client.save(paste.row())
+ except Exception as e:
+ logging.error('[!] MongoDB Error %s'%(str(e)))
try:
+ logging.debug('[+] Tweet %s'%(tweet))
bot.statuses.update(status=tweet)
- except TwitterError:
- pass
+ except TwitterError as e:
+ logging.error('[!] TwitterError %s'%(str(e)))
+ if not isRunning.is_set(): break
self.update()
while self.empty():
logging.debug('[*] No results... sleeping')
time.sleep(self.sleep)
self.update()
+
+ self.terminating()
+
+
diff --git a/lib/Slexy.py b/lib/Slexy.py
index 3876c81..b1c41d2 100644
--- a/lib/Slexy.py
+++ b/lib/Slexy.py
@@ -10,38 +10,34 @@
class SlexyPaste(Paste):
def __init__(self, id):
- self.id = id
+ super(SlexyPaste, self).__init__(id)
self.headers = {'Referer': 'http://slexy.org/view/' + self.id}
self.url = 'http://slexy.org/raw/' + self.id
- super(SlexyPaste, self).__init__()
-
+
+ def get(self):
+ self.text = helper.curl(self.url, self.headers['Referer'])
class Slexy(Site):
- def __init__(self, last_id=None):
- if not last_id:
- last_id = None
- self.ref_id = last_id
+ def __init__(self):
self.BASE_URL = 'http://slexy.org'
self.sleep = SLEEP_SLEXY
super(Slexy, self).__init__()
-
+ logging.info('[+] Started Slexy')
+
+ def parse(self):
+ return BeautifulSoup(helper.curl(self.BASE_URL + '/recent')).find_all(
+ lambda tag: tag.name == 'td' and tag.a and '/view/' in tag.a['href'])
+
def update(self):
'''update(self) - Fill Queue with new Slexy IDs'''
- logging.info('[*] Retrieving Slexy ID\'s')
- results = BeautifulSoup(helper.download(self.BASE_URL + '/recent')).find_all(
- lambda tag: tag.name == 'td' and tag.a and '/view/' in tag.a['href'])
- new_pastes = []
- if not self.ref_id:
- results = results[:60]
- for entry in results:
+ logging.debug('[*] Retrieving Slexy ID\'s')
+
+ i=0
+ for entry in self.parse():
paste = SlexyPaste(entry.a['href'].replace('/view/', ''))
- # Check to see if we found our last checked URL
- if paste.id == self.ref_id:
- break
- new_pastes.append(paste)
- for entry in new_pastes[::-1]:
- logging.info('[+] Adding URL: ' + entry.url)
- self.put(entry)
+ if not self.hasSeen(paste):
+ i+=1
+ self.put(paste)
+ logging.debug('Slexy Added URLs: ' + str(i))
+
- def get_paste_text(self, paste):
- return helper.download(paste.url, paste.headers)
diff --git a/lib/Stats.py b/lib/Stats.py
new file mode 100644
index 0000000..0c13e7f
--- /dev/null
+++ b/lib/Stats.py
@@ -0,0 +1,83 @@
+from pymongo import MongoClient
+from bson import Code
+from twitter import TwitterError
+from settings import USE_DB, DB_HOST, DB_PORT, STATS_FREQ
+
+import logging
+import time
+
+class Stats(object):
+ def __init__(self):
+ if USE_DB:
+ try:
+ self.client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes
+ except pymongo.errors.ConnectionFailure, e:
+ logging.error('[!] Database failed to start %s'%(e))
+ #commenting this cache call out; taking too much time at start up.
+ #self.cacheEmail = self.uniqueEmailSet()
+ self.cacheEmail = None
+
+ def uniqueEmailSet(self):
+ map = Code("function () {"
+ " this.emails.forEach(function(z) {"
+ " emit(z,1);"
+ " });"
+ "}")
+ reduce = Code("function (key,values) {"
+ "var total = 0;"
+ "for (var i = 0; i 0:
@@ -63,7 +105,6 @@ def build_tweet(paste):
tweet += ' Possible SSH private key'
elif paste.type == 'honeypot':
tweet += ' Dionaea Honeypot Log'
- tweet += ' #infoleak'
- if paste.num_emails > 0:
- print(paste.emails)
+ tweet += ' #infosec #dataleak'
+
return tweet
diff --git a/lib/regexes.py b/lib/regexes.py
index 4d1e535..2486d25 100644
--- a/lib/regexes.py
+++ b/lib/regexes.py
@@ -2,6 +2,7 @@
regexes = {
'email': re.compile(r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}', re.I),
+ 'email2':re.compile(r'[\w\.-]+@[\w\.-]+'),
#'ssn' : re.compile(r'\d{3}-?\d{2}-?\d{4}'),
'hash32': re.compile(r'[^