diff --git a/.gitignore b/.gitignore
index fc2cb1e..eb45148 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
 # project
 settings.py
 output.log
+.project
+.pydevproject
 
 # python specific
 *.pyc
diff --git a/.project b/.project
new file mode 100644
index 0000000..6d730ed
--- /dev/null
+++ b/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>dumpmon</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.python.pydev.PyDevBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.python.pydev.pythonNature</nature>
+	</natures>
+</projectDescription>
diff --git a/.pydevproject b/.pydevproject
new file mode 100644
index 0000000..40e9f40
--- /dev/null
+++ b/.pydevproject
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?eclipse-pydev version="1.0"?><pydev_project>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
+</pydev_project>
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dumpmon.py b/dumpmon.py
index a315673..b517f2c 100644
--- a/dumpmon.py
+++ b/dumpmon.py
@@ -12,12 +12,17 @@
 from lib.Pastebin import Pastebin, PastebinPaste
 from lib.Slexy import Slexy, SlexyPaste
 from lib.Pastie import Pastie, PastiePaste
-from lib.helper import log
+from lib.HaveIBeen import HaveIBeen, HaveIBeenPaste
+
+from lib.helper import log, createThread
+from lib.TwitterBot import TwitterBot
+from lib.RegexMgr import RegexMgr
+from lib.Stats import Stats
 from time import sleep
-from twitter import Twitter, OAuth
-from settings import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET, log_file
+from settings import log_file
 import threading
 import logging
+from logging.handlers import RotatingFileHandler
 
 
 def monitor():
@@ -30,36 +35,55 @@ def monitor():
     parser.add_argument(
         "-v", "--verbose", help="more verbose", action="store_true")
     args = parser.parse_args()
+    
     level = logging.INFO
     if args.verbose:
         level = logging.DEBUG
+    
     logging.basicConfig(
-        format='%(asctime)s [%(levelname)s] %(message)s', filename=log_file, level=level)
+        format='%(asctime)s [%(levelname)s][%(module)s][%(funcName)s] %(message)s', filename=log_file, level=level)
+    
+    handler = RotatingFileHandler(log_file, maxBytes=20*1000,
+                                  backupCount=5)
+    #logging.addHandler(handler)
+    
     logging.info('Monitoring...')
-    bot = Twitter(
-        auth=OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET,
-            CONSUMER_KEY, CONSUMER_SECRET)
-        )
-    # Create lock for both output log and tweet action
+    
+    regexMgr = RegexMgr()
+    bot = TwitterBot(regexMgr)
+                        
+    # Create lock for output log
     log_lock = threading.Lock()
-    tweet_lock = threading.Lock()
-
-    pastebin_thread = threading.Thread(
-        target=Pastebin().monitor, args=[bot, tweet_lock])
-    slexy_thread = threading.Thread(
-        target=Slexy().monitor, args=[bot, tweet_lock])
-    pastie_thead = threading.Thread(
-        target=Pastie().monitor, args=[bot, tweet_lock])
-
-    for thread in (pastebin_thread, slexy_thread, pastie_thead):
-        thread.daemon = True
-        thread.start()
+    
+    #create an event to tell threads to keep running
+    isRunning = threading.Event()
+    isRunning.set()
+    
+    #array to keep a handle on threads    
+    workers = []         
+    
+    #these next 2 workers don't need to be joined when termd
+    createThread(bot.monitor)
+    createThread(Stats().monitor,bot)
+    
+    #these workers need to be shut down gracefully
+    workers.append(createThread(HaveIBeen().monitor,bot,isRunning))
+    workers.append(createThread(Pastebin().monitor,bot,isRunning))
+    workers.append(createThread(Slexy().monitor,bot,isRunning))
+    workers.append(createThread(Pastie().monitor,bot,isRunning))
 
     # Let threads run
     try:
         while(1):
             sleep(5)
     except KeyboardInterrupt:
+        #signal threads to shutdown
+        isRunning.clear()
+        print 'stopping'
+        #wait for threads to join
+        for t in workers:
+            t.join()
+        print 'stopped'    
         logging.warn('Stopped.')
 
 
diff --git a/lib/HaveIBeen.py b/lib/HaveIBeen.py
new file mode 100644
index 0000000..94024e1
--- /dev/null
+++ b/lib/HaveIBeen.py
@@ -0,0 +1,60 @@
+"""
+Troy Hunt's RSS Feed for the last 50 pastes
+
+http://feeds.feedburner.com/HaveIBeenPwnedLatestPastes
+
+"""
+import feedparser
+
+from .Site import Site
+from .Paste import Paste
+from bs4 import BeautifulSoup
+from . import helper
+from time import sleep
+from settings import SLEEP_HAVEIBEEN
+from twitter import TwitterError
+import logging
+
+class HaveIBeenPaste(Paste):
+    def __init__(self, id):
+        super(HaveIBeenPaste, self).__init__(id)
+        self.headers = None
+        self.url = 'http://pastebin.com/raw.php?i=' + self.id
+        
+    def get(self):
+        self.text =  helper.curl(self.url)
+        
+class HaveIBeen(Site):
+    def __init__(self):
+        super(HaveIBeen, self).__init__()
+        self.sleep = SLEEP_HAVEIBEEN
+        logging.info('[+] Started HaveIBeen')
+        self.feedURL = 'http://feeds.feedburner.com/HaveIBeenPwnedLatestPastes'
+        
+    def _parse(self):
+        try:
+            d = feedparser.parse(self.feedURL)
+            return d['entries']
+        except Exception as e:
+            logging.error('[!] Feed Parser Error: %s'%(str(e)))
+            return None
+        
+    def update(self):
+        logging.debug('Retrieving HaveIBeenPwned ID\'s')
+        i=0 
+        
+        for entry in self._parse():
+            l = entry['links'][0]['href']
+            link = l.split(r'/')
+            paste = HaveIBeenPaste(link[3])
+            if not self.hasSeen(paste):
+                i+=1
+                self.put(paste)
+        logging.debug('HaveIBeenPwned Added URLs: ' + str(i))
+
+           
+
+
+if __name__ == '__main__':
+    c = HaveIBeen()
+    c.update()
\ No newline at end of file
diff --git a/lib/Paste.py b/lib/Paste.py
index bc379c6..ebe1072 100644
--- a/lib/Paste.py
+++ b/lib/Paste.py
@@ -2,22 +2,47 @@
 import settings
 import logging
 import re
+import time
 
 class Paste(object):
-    def __init__(self):
+    def __init__(self,id):
         '''
         class Paste: Generic "Paste" object to contain attributes of a standard paste
-
         '''
-        self.emails = 0
-        self.hashes = 0
+        self.id = id
+        self.emails = []
+        self.emails2 = []
+        self.hashes = []
         self.num_emails = 0
         self.num_hashes = 0
         self.text = None
         self.type = None
         self.sites = None
         self.db_keywords = 0.0
-
+    
+    def __eq__(self,comparePaste):
+        #logging.info('id %s compares to %s'%(self.id, comparePaste.id))
+        return self.id == comparePaste.id
+    
+    def row(self):
+        return {
+                'pid' : self.id,
+                'text' : self.text,
+                'emails' : self.emails,
+                'hashes' : self.hashes,
+                'num_emails' : self.num_emails,
+                'num_hashes' : self.num_hashes,
+                'type' : self.type,
+                'db_keywords' : self.db_keywords,
+                'url' : self.url,
+                "added":time.strftime("%c")
+               }
+    
+    def get(self):
+        #override this
+        logging.error('[@] Function Not Implemented in Subclass')
+        pass
+        
     def match(self):
         '''
         Matches the paste against a series of regular expressions to determine if the paste is 'interesting'
@@ -32,33 +57,64 @@ def match(self):
 
         '''
         # Get the amount of emails
-        self.emails = list(set(regexes['email'].findall(self.text)))
-        self.hashes = regexes['hash32'].findall(self.text)
-        self.num_emails = len(self.emails)
-        self.num_hashes = len(self.hashes)
-        if self.num_emails > 0:
-            self.sites = list(set([re.search('@(.*)$', email).group(1).lower() for email in self.emails]))
-        for regex in regexes['db_keywords']:
-            if regex.search(self.text):
-                logging.debug('\t[+] ' + regex.search(self.text).group(1))
-                self.db_keywords += round(1/float(
-                    len(regexes['db_keywords'])), 2)
-        for regex in regexes['blacklist']:
-            if regex.search(self.text):
-                logging.debug('\t[-] ' + regex.search(self.text).group(1))
-                self.db_keywords -= round(1.25 * (
-                    1/float(len(regexes['db_keywords']))), 2)
-        if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD):
-            self.type = 'db_dump'
-        if regexes['cisco_hash'].search(self.text) or regexes['cisco_pass'].search(self.text):
-            self.type = 'cisco'
-        if regexes['honeypot'].search(self.text):
-            self.type = 'honeypot'
-        if regexes['google_api'].search(self.text):
-            self.type = 'google_api'
-        # if regexes['juniper'].search(self.text): self.type = 'Juniper'
-        for regex in regexes['banlist']:
-            if regex.search(self.text):
-                self.type = None
-                break
-        return self.type
+        try:
+            r = self.text.splitlines()
+            logging.debug("[*] Num Lines in text: %i"%(len(r)))           
+
+            if regexes['email'].search(self.text):
+                self.emails = regexes['email'].findall(self.text)
+                
+            if regexes['email2'].search(self.text):
+                self.emails2 = regexes['email2'].findall(self.text)
+            
+            self.hashes = regexes['hash32'].findall(self.text)
+            
+            self.num_emails = len(self.emails)
+            logging.debug("[*] Num Emails: %i"%(self.num_emails))
+            
+            self.num_emails = len(self.emails2)
+            logging.debug("[*] Num Emails2: %i"%(self.num_emails))
+            
+            self.num_hashes = len(self.hashes)
+            logging.debug("[*] Num Hashes: %i"%(self.num_hashes))
+            
+            if self.num_emails > 0:
+                self.sites = list(set([re.search('@(.*)$', email).group(1).lower() for email in self.emails]))
+                logging.debug("[*] Num Sites: %i"%(len(self.sites)))
+                
+            for regex in regexes['db_keywords']:
+                if regex.search(self.text):
+                    logging.debug('\t[+] ' + regex.search(self.text).group(1))
+                    self.db_keywords += round(1/float(
+                        len(regexes['db_keywords'])), 2)
+                    
+            for regex in regexes['blacklist']:
+                if regex.search(self.text):
+                    logging.debug('\t[-] ' + regex.search(self.text).group(1))
+                    self.db_keywords -= round(1.25 * (
+                        1/float(len(regexes['db_keywords']))), 2)
+                    
+            if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD):
+                self.type = 'db_dump'
+                
+            if regexes['cisco_hash'].search(self.text) or regexes['cisco_pass'].search(self.text):
+                self.type = 'cisco'
+                
+            if regexes['honeypot'].search(self.text):
+                self.type = 'honeypot'
+                
+            if regexes['google_api'].search(self.text):
+                self.type = 'google_api'
+                
+            # if regexes['juniper'].search(self.text): self.type = 'Juniper'
+            for regex in regexes['banlist']:
+                if regex.search(self.text):
+                    self.type = None
+                    break
+    
+            logging.debug("[*] Type: %s"%(self.type))    
+            return self.type
+        
+        except Exception as e:
+            logging.error("[!] Error: %s"%(str(e)))
+            return None
diff --git a/lib/Pastebin.py b/lib/Pastebin.py
index e9656cd..94303a1 100644
--- a/lib/Pastebin.py
+++ b/lib/Pastebin.py
@@ -10,37 +10,40 @@
 
 class PastebinPaste(Paste):
     def __init__(self, id):
-        self.id = id
+        super(PastebinPaste, self).__init__(id)
         self.headers = None
         self.url = 'http://pastebin.com/raw.php?i=' + self.id
-        super(PastebinPaste, self).__init__()
-
+    
+    def get(self):
+        self.text = helper.curl(self.url)
 
 class Pastebin(Site):
-    def __init__(self, last_id=None):
-        if not last_id:
-            last_id = None
-        self.ref_id = last_id
+    """
+    Pastebin will block your IP if you request more than 600 requests in 10 mins. This is per admin@pastebin.com
+    """
+    def __init__(self):
         self.BASE_URL = 'http://pastebin.com'
         self.sleep = SLEEP_PASTEBIN
         super(Pastebin, self).__init__()
-
+        logging.info('[+] Started PasteBin')
+    
+    def terminating(self):
+        #TODO: persist the seen queue
+        pass    
+    
+    def parse(self):
+        return BeautifulSoup(helper.curl(self.BASE_URL + '/archive')).find_all(
+            lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:])        
+        
     def update(self):
         '''update(self) - Fill Queue with new Pastebin IDs'''
-        logging.info('Retrieving Pastebin ID\'s')
-        results = BeautifulSoup(helper.download(self.BASE_URL + '/archive')).find_all(
-            lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:])
-        new_pastes = []
-        if not self.ref_id:
-            results = results[:60]
-        for entry in results:
+        logging.debug('Retrieving Pastebin ID\'s')
+        i=0            
+        for entry in self.parse():
             paste = PastebinPaste(entry.a['href'][1:])
-            # Check to see if we found our last checked URL
-            if paste.id == self.ref_id:
-                break
-            new_pastes.append(paste)
-        for entry in new_pastes[::-1]:
-            logging.info('Adding URL: ' + entry.url)
-            self.put(entry)
-    def get_paste_text(self, paste):
-        return helper.download(paste.url)
+            if not self.hasSeen(paste):
+                #logging.info('Adding URL: ' + paste.url)
+                i+=1
+                self.put(paste)
+        logging.debug('Pastebin Added URLs: ' + str(i))
+           
diff --git a/lib/Pastie.py b/lib/Pastie.py
index da84e55..6a38b12 100644
--- a/lib/Pastie.py
+++ b/lib/Pastie.py
@@ -10,39 +10,37 @@
 
 class PastiePaste(Paste):
     def __init__(self, id):
-        self.id = id
+        super(PastiePaste, self).__init__(id)
         self.headers = None
         self.url = 'http://pastie.org/pastes/' + self.id + '/text'
-        super(PastiePaste, self).__init__()
 
+    def get(self):
+        try:
+            self.text =  BeautifulSoup(helper.curl(self.url)).pre.text
+        except Exception as e:
+            logging.error('[!] Beautiful Soup Error: %s'%(str(e)))
+            self.text =  None
 
 class Pastie(Site):
-    def __init__(self, last_id=None):
-        if not last_id:
-            last_id = None
-        self.ref_id = last_id
+    def __init__(self):
         self.BASE_URL = 'http://pastie.org'
         self.sleep = SLEEP_PASTIE
         super(Pastie, self).__init__()
+        logging.info('[+] Started Pastie')
+        
+    def parse(self):
+        return [tag for tag in BeautifulSoup(helper.curl(
+            self.BASE_URL + '/pastes')).find_all('p', 'link') if tag.a]
 
     def update(self):
         '''update(self) - Fill Queue with new Pastie IDs'''
-        logging.info('Retrieving Pastie ID\'s')
-        results = [tag for tag in BeautifulSoup(helper.download(
-            self.BASE_URL + '/pastes')).find_all('p', 'link') if tag.a]
-        new_pastes = []
-        if not self.ref_id:
-            results = results[:60]
-        for entry in results:
+        logging.debug('Retrieving Pastie ID\'s')
+        i=0    
+        for entry in self.parse():
             paste = PastiePaste(entry.a['href'].replace(
                 self.BASE_URL + '/pastes/', ''))
-            # Check to see if we found our last checked URL
-            if paste.id == self.ref_id:
-                break
-            new_pastes.append(paste)
-        for entry in new_pastes[::-1]:
-            logging.debug('Adding URL: ' + entry.url)
-            self.put(entry)
+            if not self.hasSeen(paste):
+                i+=1
+                self.put(paste)
+        logging.debug('Pastie Added URLs: ' + str(i))
 
-    def get_paste_text(self, paste):
-        return BeautifulSoup(helper.download(paste.url)).pre.text
\ No newline at end of file
diff --git a/lib/RegexMgr.py b/lib/RegexMgr.py
new file mode 100644
index 0000000..f9c8d46
--- /dev/null
+++ b/lib/RegexMgr.py
@@ -0,0 +1,61 @@
+import re
+from pymongo import MongoClient
+from settings import USE_DB, DB_HOST, DB_PORT
+import time
+import logging
+
+import threading
+
+class RegexMgr(object):
+    """
+    This class is intended to handle all the regex stuff and persistance to the DB for observers
+    """
+    def __init__(self):        
+        self.regexLock = threading.Lock()
+        
+        if USE_DB:
+            try:
+                self.client = MongoClient(DB_HOST, DB_PORT).paste_db.regexes        
+            except pymongo.errors.ConnectionFailure, e:
+                logging.error('[!] Database failed to start %s'%(e))
+                
+        self.customRegexes = []  
+        self._loadRegexes()
+
+    
+    def _loadRegexes(self):
+        with self.regexLock:
+            cursor = self.client.find()
+            for row in cursor:
+                customRegex = {}
+                rc = re.compile(row['regex'])
+                customRegex['regex'] = rc
+                customRegex['user'] = row['user']
+                customRegex['added'] = time.strftime("%c")
+                self.customRegexes.append(customRegex)
+                           
+            logging.info("[+] Loaded custom regexes: %s"%(self.customRegexes))  
+    
+    def reloadCustomRegexes(self):
+        self.customRegexes = []
+        self._loadRegexes()
+                      
+    def add(self,regex, user):
+        if self.valid(regex):
+            o = {"user":user,"regex":regex,"added":time.strftime("%c")}
+            self.client.insert(o)
+            self.customRegexes.append(o)
+            return True
+        
+    def valid(self,regex):        
+        try:
+            re.compile(regex)
+            is_valid = True
+        except re.error:
+            is_valid = False
+        return is_valid
+    
+    
+    
+    
+    
\ No newline at end of file
diff --git a/lib/Site.py b/lib/Site.py
index 6ab60ef..e94e92c 100644
--- a/lib/Site.py
+++ b/lib/Site.py
@@ -5,10 +5,12 @@
 from pymongo import MongoClient
 from requests import ConnectionError
 from twitter import TwitterError
-from settings import USE_DB, DB_HOST, DB_PORT
+from settings import USE_DB, DB_HOST, DB_PORT, SEEN_DEQUE_LEN
 import logging
 import helper
+from random import randint
 
+from collections import deque
 
 class Site(object):
     '''
@@ -31,19 +33,33 @@ class Site(object):
     # that I could find... So, I decided to implement my own queue with a few
     # changes
     def __init__(self, queue=None):
+        
+        # the double ended queue is used to check the last n URLs to see if they have been processed, since the URLs are random strings.
+        self.seen = deque(maxlen=SEEN_DEQUE_LEN)
+        
         if queue is None:
             self.queue = []
+            
         if USE_DB:
             # Lazily create the db and collection if not present
             self.db_client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes
 
-
+    def addSeen(self,item):
+        self.seen.append(item)
+        #logging.info('[@] Site deque len %i'%(len(self.seen)))
+        
+    def hasSeen(self,item):
+        res = item in self.seen
+        #logging.info('[@] URL Seen %s %s'%(item.url,res))
+        return res
+        
     def empty(self):
         return len(self.queue) == 0
 
     def get(self):
         if not self.empty():
             result = self.queue[0]
+            self.addSeen(result)
             del self.queue[0]
         else:
             result = None
@@ -67,36 +83,50 @@ def clear(self):
     def list(self):
         print('\n'.join(url for url in self.queue))
 
-    def monitor(self, bot, t_lock):
+    def parse(self):
+        #override this
+        logging.error('[@] Function Not Implemented in Subclass')
+        pass
+    
+    def update(self):
+        #override this
+        logging.error('[@] Function Not Implemented in Subclass')
+        pass
+    
+    def terminating(self):
+        #this can be overridden in subclass
+        logging.debug('[!] Terminating.....')
+    
+    def monitor(self, bot, isRunning):
         self.update()
-        while(1):
+        while isRunning.is_set():
             while not self.empty():
+                if not isRunning.is_set(): break
+                #need to sleep to avoid the ban....
+                time.sleep(randint(2,5))
                 paste = self.get()
-                self.ref_id = paste.id
-                logging.info('[*] Checking ' + paste.url)
-                paste.text = self.get_paste_text(paste)
+                paste.get()
                 tweet = helper.build_tweet(paste)
                 if tweet:
                     logging.info(tweet)
-                    with t_lock:
+                    with bot.tweetLock:
                         if USE_DB:
-                            self.db_client.save({
-                                'pid' : paste.id,
-                                'text' : paste.text,
-                                'emails' : paste.emails,
-                                'hashes' : paste.hashes,
-                                'num_emails' : paste.num_emails,
-                                'num_hashes' : paste.num_hashes,
-                                'type' : paste.type,
-                                'db_keywords' : paste.db_keywords,
-                                'url' : paste.url
-                               })
+                            try:
+                                self.db_client.save(paste.row())
+                            except Exception as e:
+                                logging.error('[!] MongoDB Error %s'%(str(e)))
                         try:
+                            logging.debug('[+] Tweet %s'%(tweet))
                             bot.statuses.update(status=tweet)
-                        except TwitterError:
-                            pass
+                        except TwitterError as e:
+                            logging.error('[!] TwitterError %s'%(str(e)))
+            if not isRunning.is_set(): break
             self.update()
             while self.empty():
                 logging.debug('[*] No results... sleeping')
                 time.sleep(self.sleep)
                 self.update()
+                
+        self.terminating()
+
+
diff --git a/lib/Slexy.py b/lib/Slexy.py
index 3876c81..b1c41d2 100644
--- a/lib/Slexy.py
+++ b/lib/Slexy.py
@@ -10,38 +10,34 @@
 
 class SlexyPaste(Paste):
     def __init__(self, id):
-        self.id = id
+        super(SlexyPaste, self).__init__(id)
         self.headers = {'Referer': 'http://slexy.org/view/' + self.id}
         self.url = 'http://slexy.org/raw/' + self.id
-        super(SlexyPaste, self).__init__()
-
+        
+    def get(self):
+        self.text = helper.curl(self.url, self.headers['Referer'])
 
 class Slexy(Site):
-    def __init__(self, last_id=None):
-        if not last_id:
-            last_id = None
-        self.ref_id = last_id
+    def __init__(self):
         self.BASE_URL = 'http://slexy.org'
         self.sleep = SLEEP_SLEXY
         super(Slexy, self).__init__()
-
+        logging.info('[+] Started Slexy')
+        
+    def parse(self):
+        return BeautifulSoup(helper.curl(self.BASE_URL + '/recent')).find_all(
+            lambda tag: tag.name == 'td' and tag.a and '/view/' in tag.a['href'])  
+        
     def update(self):
         '''update(self) - Fill Queue with new Slexy IDs'''
-        logging.info('[*] Retrieving Slexy ID\'s')
-        results = BeautifulSoup(helper.download(self.BASE_URL + '/recent')).find_all(
-            lambda tag: tag.name == 'td' and tag.a and '/view/' in tag.a['href'])
-        new_pastes = []
-        if not self.ref_id:
-            results = results[:60]
-        for entry in results:
+        logging.debug('[*] Retrieving Slexy ID\'s')
+
+        i=0   
+        for entry in self.parse():
             paste = SlexyPaste(entry.a['href'].replace('/view/', ''))
-            # Check to see if we found our last checked URL
-            if paste.id == self.ref_id:
-                break
-            new_pastes.append(paste)
-        for entry in new_pastes[::-1]:
-            logging.info('[+] Adding URL: ' + entry.url)
-            self.put(entry)
+            if not self.hasSeen(paste):
+                i+=1
+                self.put(paste)
+        logging.debug('Slexy Added URLs: ' + str(i))
+
 
-    def get_paste_text(self, paste):
-        return helper.download(paste.url, paste.headers)
diff --git a/lib/Stats.py b/lib/Stats.py
new file mode 100644
index 0000000..0c13e7f
--- /dev/null
+++ b/lib/Stats.py
@@ -0,0 +1,83 @@
+from pymongo import MongoClient
+from bson import Code
+from twitter import TwitterError
+from settings import USE_DB, DB_HOST, DB_PORT, STATS_FREQ
+
+import logging
+import time
+
+class Stats(object):
+    def __init__(self):
+        if USE_DB:
+            try:
+                self.client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes        
+            except pymongo.errors.ConnectionFailure, e:
+                logging.error('[!] Database failed to start %s'%(e))
+            #commenting this cache call out; taking too much time at start up.    
+            #self.cacheEmail = self.uniqueEmailSet()
+            self.cacheEmail = None
+                
+    def uniqueEmailSet(self):
+            map = Code("function () {"
+                       " this.emails.forEach(function(z) {"
+                       "    emit(z,1);"
+                       "    });"
+                       "}")
+            reduce = Code("function (key,values) {"
+                          "var total = 0;"
+                          "for (var i = 0; i <values.length; i++) {"
+                          "    total += values[i];"
+                          "}"
+                          "return total;"
+                        "}")
+            result = self.client.map_reduce(map,reduce,"res") 
+            return result  
+        
+    def uniqueHashSet(self):
+            map = Code("function () {"
+                       " this.hashes.forEach(function(z) {"
+                       "    emit(z,1);"
+                       "    });"
+                       "}")
+            reduce = Code("function (key,values) {"
+                          "var total = 0;"
+                          "for (var i = 0; i <values.length; i++) {"
+                          "    total += values[i];"
+                          "}"
+                          "return total;"
+                        "}")
+            result = self.client.map_reduce(map,reduce,"res") 
+            return result 
+
+    def status(self):
+        if not USE_DB:
+            logging.warning("[!] Not going to produce Stats because DB is off.")
+            return None
+        try:
+            e = self.uniqueEmailSet().count()
+            h = self.uniqueHashSet().count()
+            self.cacheEmail = self.uniqueEmailSet()
+            
+            msg =  "Status as of %s: \n Unique emails: %i, Unique hashes: %i\n #infosec #dataleak"%(time.strftime("%c"),e,h)    
+            return msg
+        except Exception,e:
+            logging.error('[!] Database Error %s'%(e))
+            return None
+                                                      
+    def monitor(self,twitterBot):
+        try:
+            msg = self.status()
+            if msg:
+                with twitterBot.tweetLock:
+                    try:
+                        logging.debug('[+] Status Tweet %s'%(msg))
+                        twitterBot.statuses.update(status=msg)
+                    except TwitterError as e:
+                        logging.debug('[!] TwitterError %s'%(str(e)))
+        except Exception,e:
+            logging.error('[!] Database Error %s'%(e))
+            
+        time.sleep(STATS_FREQ)
+            
+            
+            
\ No newline at end of file
diff --git a/lib/TwitterBot.py b/lib/TwitterBot.py
new file mode 100644
index 0000000..8693fbc
--- /dev/null
+++ b/lib/TwitterBot.py
@@ -0,0 +1,127 @@
+from twitter import *
+from settings import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET, log_file, TWITTER_SCREEN_NAME
+import logging
+import time
+import threading
+from lib.Stats import Stats
+from lib.helper import createThread
+
+from lib.UserSubmitted import UserSubmitted, UserSubmittedPaste
+
+class TwitterBot(Twitter):
+    """
+    Subclassing the Twitter API and botifying it
+    """
+    def __init__(self,regexMgr):
+        super(TwitterBot, self).__init__(auth=OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET,
+            CONSUMER_KEY, CONSUMER_SECRET))
+        
+        self.tweetLock= threading.Lock()
+        
+        self.regexMgr = regexMgr
+        self.statusMgr = Stats()
+        self.userSubmittedSite = UserSubmitted()
+        logging.info('[+] Started TwitterBot')
+    
+    def pwn3d(self,aryDM,user):
+        i = self.statusMgr.cacheEmail.find_one({'_id':aryDM[1]})
+        if i:
+            return "Yes, I was able to find %s email %d times in data leaks."%(i['_id'],int(i['value']))
+        else:
+            return "No, I haven't found %s email yet in data breaches."%(aryDM[1])
+        
+    def check(self,aryDM,user):
+        self.userSubmittedSite.update(aryDM[1])
+        response = self.userSubmittedSite.monitor(self)
+        if response:     
+            return response
+        else:
+            return "I did not find anything interesting for "+aryDM[1]
+    
+    def status(self,aryDM,user):
+        return self.statusMgr.status()
+        
+    def addregex(self,aryDM,user):
+        """
+        The add regex assumes that ary[0] = 'addregex' and ary[1] = 'theregex'
+        """
+        response = None
+        if self.regexMgr.add(aryDM[1],user):  
+            response = "Your regex has been added! Thanks!"
+        else:
+            response  = "I could not add your regex, it didn't validate. :("
+        return response
+    
+    def _parseTweet(self,dm,t):
+        """
+        Probably should make this a protocol, but we'll see
+        """  
+        logging.info('[+] Processing DM request: %s Screen Name: %s'%(dm['text'],dm['sender']['screen_name']))  
+        response = None
+        #assume that we are going to use a space delim protocol and the ary[0] is the function name to call.
+        aryDM = dm['text'].split()
+        logging.info('[+] Processing DM action: %s'%(aryDM[0])) 
+        
+        try:
+            f = getattr(self,aryDM[0])
+            if f:
+                user = dm['sender']['screen_name']
+                response = f(aryDM, user) 
+                logging.info('[+] Sending DM response: %s Screen Name: %s'%(response,dm['sender']['screen_name'])) 
+            else:
+                logging.error('[!] Could not find function in protocol: %s Screen Name: %s'%(aryDM[0],dm['sender']['screen_name']))
+                              
+            if response:
+                with self.tweetLock:
+                    try:
+                        self.direct_messages.new(user=dm['sender']['screen_name'],text=response)
+                    except TwitterError as e:
+                        logging.debug('[!] TwitterError %s'%(str(e)))
+        except Exception as e:
+            logging.error('[!] Error trying to parse DM: %s  %s'%(aryDM,str(e)))
+    
+    def auto_follow_followers(self):
+        """
+            Follows back everyone who's followed you
+        """
+        following = set(self.friends.ids(screen_name=TWITTER_SCREEN_NAME)["ids"])
+        followers = set(self.followers.ids(screen_name=TWITTER_SCREEN_NAME)["ids"])
+
+        not_following_back = followers - following
+
+        for user_id in not_following_back:
+            try:
+                self.friendships.create(user_id=user_id, follow=True)
+                logging.info('[+] Now Following: %s'%(user_id)) 
+            except Exception as e:
+                logging.error('[!] Error trying to add followers: %s '%(str(e)))          
+                
+    def monitor(self):
+        """
+        This function is expected to be on a separate thread.
+        This stream function is blocking and will not yield, thus does not need to be in a loop; refer to the docs
+        """
+        twitter_userstream = TwitterStream(auth=self.auth, domain='userstream.twitter.com')
+        self.auto_follow_followers()
+        try:
+            for msg in twitter_userstream.user():             
+                if 'text' in msg:
+                    print("[$] Recieved Tweet %s from %s"%(msg['text'],msg['user']['screen_name']))
+                
+                #process DMs, but only from other people     
+                if 'direct_message' in msg and msg['direct_message']['sender']['screen_name'] != TWITTER_SCREEN_NAME:
+                    self._parseTweet(msg['direct_message'],msg)
+                
+                if 'source' in msg and 'follow_request_sent' in msg['source']:
+                    self.auto_follow_followers()
+                    
+        except StopIteration:
+            print("stopping iteration")
+        except TwitterError as e:
+            logging.error('[!] TwitterError %s'%(str(e)))
+            
+            
+            
+            
+            
+            
\ No newline at end of file
diff --git a/lib/UserSubmitted.py b/lib/UserSubmitted.py
new file mode 100644
index 0000000..05803c2
--- /dev/null
+++ b/lib/UserSubmitted.py
@@ -0,0 +1,53 @@
+from .Site import Site
+from .Paste import Paste
+from bs4 import BeautifulSoup
+from . import helper
+import time
+from settings import USE_DB
+from twitter import TwitterError
+import logging
+from urlunshort import resolve
+
+class UserSubmittedPaste(Paste):
+    def __init__(self, url):
+        super(UserSubmittedPaste, self).__init__(url)
+        self.headers = None
+        self.url = resolve(url)
+        logging.debug('[+] URL expanded to %s'%(self.url))
+
+    def get(self):
+        self.text =  helper.curl(self.url)
+
+class UserSubmitted(Site):
+    def __init__(self):
+        super(UserSubmitted, self).__init__()
+        logging.info('[+] Started UserSubmitted')
+        
+    def parse(self):
+        pass       
+        
+    def update(self,url):
+        paste = UserSubmittedPaste(url)
+        if not self.hasSeen(paste):
+            logging.debug('Adding User Sumbmitted URL: ' + paste.url)
+            self.put(paste)
+
+    def monitor(self, bot):
+        if not self.empty():
+            paste = self.get()
+            logging.debug('[*] Checking ' + paste.url)
+            paste.get()
+            tweet = helper.build_tweet(paste)
+            if tweet:
+                logging.debug(tweet)
+                with bot.tweetLock:
+                    if USE_DB:
+                        self.db_client.save(repr(paste))
+                    try:
+                        logging.debug('[+] Tweet %s'%(tweet))
+                        bot.statuses.update(status=tweet)
+                        return tweet
+                    except TwitterError as e:
+                        logging.debug('[!] TwitterError %s'%(str(e)))
+
+           
diff --git a/lib/helper.py b/lib/helper.py
index bccfb02..9ad8f64 100644
--- a/lib/helper.py
+++ b/lib/helper.py
@@ -8,10 +8,50 @@
 import settings
 from time import sleep, strftime
 import logging
+import threading
 
+import pycurl
+from StringIO import StringIO
 
 r = requests.Session()
-
+def createThread(target,*args,**kwargs):        
+     t = threading.Thread(target=target, args=args, kwargs=kwargs)         
+     t.daemon = True
+     t.start()
+     return t
+             
+def curl (url,referer=None):
+    try:
+        buffer = StringIO()
+        c = pycurl.Curl()
+        c.setopt(c.URL, url)
+        c.setopt(c.WRITEDATA, buffer)
+        
+        # Follow redirect.
+        c.setopt(c.FOLLOWLOCATION, True)
+        
+        if referer:
+            c.setopt(c.REFERER, referer)
+        
+        c.perform()
+        rc = c.getinfo(c.RESPONSE_CODE)
+        c.close()   
+        
+        #TODO: need to figure out a back off scenario
+        if rc == 403:
+            logging.error('[!] %s Response code: %d'%(url,rc))
+            return "backoff"
+        
+        if rc != 200:
+            logging.error('[!] %s Response code: %d'%(url,rc))
+            
+        r =  buffer.getvalue()    
+        #sometimes the buffer is None
+        return r if r else "no html could be returned"
+    
+    except Exception as e:
+        logging.error('[!] Curl Error: %s'%(str(e)))
+        return "error"
 
 def download(url, headers=None):
     if not headers:
@@ -19,6 +59,7 @@ def download(url, headers=None):
     if headers:
         r.headers.update(headers)
     try:
+        logging.info(url)
         response = r.get(url).text
     except requests.ConnectionError:
         logging.warn('[!] Critical Error - Cannot connect to site')
@@ -45,6 +86,7 @@ def build_tweet(paste):
     '''
     tweet = None
     if paste.match():
+        logging.debug('Paste Matched')
         tweet = paste.url
         if paste.type == 'db_dump':
             if paste.num_emails > 0:
@@ -63,7 +105,6 @@ def build_tweet(paste):
             tweet += ' Possible SSH private key'
         elif paste.type == 'honeypot':
             tweet += ' Dionaea Honeypot Log'
-        tweet += ' #infoleak'
-    if paste.num_emails > 0:
-        print(paste.emails)
+        tweet += ' #infosec #dataleak'
+
     return tweet
diff --git a/lib/regexes.py b/lib/regexes.py
index 4d1e535..2486d25 100644
--- a/lib/regexes.py
+++ b/lib/regexes.py
@@ -2,6 +2,7 @@
 
 regexes = {
     'email': re.compile(r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}', re.I),
+    'email2':re.compile(r'[\w\.-]+@[\w\.-]+'),
     #'ssn' : re.compile(r'\d{3}-?\d{2}-?\d{4}'),
     'hash32': re.compile(r'[^<A-F\d/]([A-F\d]{32})[^A-F\d]', re.I),
     'FFF': re.compile(r'FBI\s*Friday', re.I),  # will need to work on this to not match CSS
diff --git a/settings.py-example b/settings.py-example
index 1b8ca26..2978ed1 100644
--- a/settings.py-example
+++ b/settings.py-example
@@ -20,6 +20,17 @@ SLEEP_SLEXY = 60
 SLEEP_PASTEBIN = 15
 SLEEP_PASTIE = 30
 
+
 # Other configuration
 tweet_history = "tweet.history"
 log_file = "output.log"
+
+# This needs to be set to the max you'd expect to pull from a single page!!
+#    otherwise you will get a rolling queue and possible process parsed URLs more than once
+SEEN_DEQUE_LEN = 500
+
+# Status freqquency the bot will spew out status; making math do the work as this is the number of seconds to delay between status updates
+STATS_FREQ = 60*10
+
+# This is the screen name of your bot
+TWITTER_SCREEN_NAME = 'DataLeakBot'
diff --git a/tools/__init__.py b/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tools/dumpemail.py b/tools/dumpemail.py
new file mode 100644
index 0000000..c189805
--- /dev/null
+++ b/tools/dumpemail.py
@@ -0,0 +1,25 @@
+from pymongo import MongoClient
+from bson import Code
+DB_HOST = 'localhost'
+DB_PORT = 27017
+
+client = MongoClient(DB_HOST, DB_PORT).paste_db.pastes
+
+def uniqueEmailSet():
+        map = Code("function () {"
+                   " this.emails.forEach(function(z) {"
+                   "    emit(z,1);"
+                   "    });"
+                   "}")
+        reduce = Code("function (key,values) {"
+                      "var total = 0;"
+                      "for (var i = 0; i <values.length; i++) {"
+                      "    total += values[i];"
+                      "}"
+                      "return total;"
+                    "}")
+        result = client.map_reduce(map,reduce,"res") 
+        return result 
+
+for r in uniqueEmailSet().find():
+    print r
\ No newline at end of file