diff --git a/README.md b/README.md
index e7972af..37fe9c1 100644
--- a/README.md
+++ b/README.md
@@ -1,32 +1,60 @@
 [![Build Status](https://travis-ci.org/Floby/node-tokenizer.png)](https://travis-ci.org/Floby/node-tokenizer)
 
 # Synopsis
-A wide purpose tokenizer for JavaScript. The interface follows more or less
-the WriteStream from [node.js](http://nodejs.org).
+A wide purpose tokenizer for JavaScript that tokenizes based on rules established using Regular Expressions. The interface conforms to the WriteStream from [node.js](http://nodejs.org).
 
-node-tokenizer is published on npm so you can install it with `npm install tokenizer`
+# Installation
+
+    npm i tokenizer
 
 ## How to
 
-* require the Tokenizer constructor
+**Requiring**
 
 ``` javascript
 var Tokenizer = require('tokenizer');
 ```
 
-* construct one (we'll see what the callback is used for)
+**Construction**
 
 ``` javascript
-var t = new Tokenizer(mycallback);
+var t = new Tokenizer(mycallback, options);
 ``` 
 
-* add rules
+**Setting Options**
+
+Options is an object passed to the constructor function and can contain the following properties (defaults shown inline):
+
+    {
+      stepSize: 0, // For large streams, the maximum size that will be tokenized at a time. This must be larger than the largest expected token.
+      split: undefined // A regular expression. See explanation in 'Splitting into Smaller Pieces'
+    }
+
+**Adding Rules**
 
 ``` javascript
 t.addRule(/^my regex$/, 'type');
 ```
 
-* write or pump to it
+**Splitting into Smaller Pieces**
+
+By default, tokenizer attempts to find the longest match in the input stream. This can be a large performance hit for big files. If you are certain that your tokens will never cross a certain type of regular expression boundary (like /\n/) you can specify to split your input by that before tokenization which could improve performance dramatically.
+
+``` javascript
+// Break CSV into subportions and tokenize each subportion separately but in order of original input
+t = new Tokenizer(undefined, {
+  split: /\,/
+}); 
+```
+
+``` javascript
+// Break file up by lines and tokenize each line separately.
+t = new Tokenizer(undefined, {
+  split: /\r?\n/
+});
+```
+
+**Writing/Piping**
 
 ``` javascript
 t.write(data);
@@ -34,18 +62,18 @@ t.write(data);
 stream.pipe(t);
 ```
 
-* listen for new tokens
+**Listen for tokens**
 
 ``` javascript
 t.on('token', function(token, type) {
     // do something useful
     // type is the type of the token (specified with addRule)
     // token is the actual matching string
-})
+});
 // alternatively you can use the tokenizer as a readable stream.
 ```
 
-* look out for the end
+**Listening for completion**
 
 ``` javascript
 t.on('end', callback);
@@ -63,24 +91,32 @@ and match, an object like this
 }
 ```
 
-Have a look in the example folder
+##Examples
+
+Take a look a the [examples](https://github.com/Floby/node-tokenizer/tree/master/examples) folder.
 
 ## Rules
-rules are regular expressions associated with a type name.
+
+Rules are regular expressions associated with a type name.
+
 The tokenizer tries to find the longest string matching one or more rules.
 When several rules match the same string, priority is given to the rule
-which was added first. (this may change)
+which was added first.
 
-Please note that your regular expressions should use ^ and $ in order
+Note: normally your regular expressions should use ^ and $ in order
 to test the whole string. If these are not used, you rule will match _every_
 string that contains what you specified, this could be the whole file!
 
 ## To do
-* a lot of optimisation
-* being able to share rules across several tokenizers
-    (although this can be achieved through inheritance)
-* probably more hooks
-* more checking
+
+* Continued optimisation
+* Rule sharing across several tokenizers (although this can be achieved through inheritance)
+* Need more hooks
+* Increase test coverage
+
+## Testing
+
+Testing is provided via the 
 
 ## License
 
diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 01dc3d2..1846045 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -1,99 +1,158 @@
-var EventEmitter = require('events').EventEmitter;
-var util = require('util');
-var assert = require('assert');
-var Transform = require('stream').Transform;
-var disect = require('disect');
+// Using a fork of npm tokenizer by JFloby. (c) JFloby with modifications by me.
+
+var EventEmitter = require('events').EventEmitter,
+    util = require('util'),
+    assert = require('assert'),
+    Transform = require('stream').Transform;
 
 function noop(){}
 
-function Tokenizer (check_token_cb, options) {
-    if(!(this instanceof Tokenizer)) {
-      return new Tokenizer(check_token_cb);
-    }
+function Tokenizer (check_token_cb, options, error_cb) {
+  if(!(this instanceof Tokenizer)) {
+    return new Tokenizer(check_token_cb, options);
+  }
+
+  this.options = options || {};
+  this.options.stepSize = this.options.hasOwnProperty('stepSize') ? this.options.stepSize : 0;
+
+  Transform.call(this, options);
 
-    Transform.call(this, options);
-    this._readableState.objectMode = true;
-    this._buffered = ""; // we buffer untokenized data between writes
-    this._regexes = []; // should contain objects 
-                        // with regex[RegExp] and type[String]
-    this._ignored = {}; // a hash of ignored token types
-                        // these will be parsed but not emitted
-    this._checkToken = check_token_cb || noop;
+  this._readableState.objectMode = true;
+  this._buffered = '';  // we buffer untokenized data between writes
+  this._regexes = [];   // should contain objects with regex[RegExp] and type[String]
+  this._ignored = {};   // a hash of ignored token types these will be parsed but not emitted
+  this._checkToken = check_token_cb || noop;
+  this._error = error_cb;
 }
+
 util.inherits(Tokenizer, Transform);
 
 Tokenizer.prototype._transform = function _transform(chunk, encoding, callback) {
   chunk = chunk.toString();
   var self = this;
+
   process.nextTick(function () {
-    try {
-      var index = 0, step = 64;
-      while(index < chunk.length) {
+    var index = 0,
+        step = self.options.stepSize;
+
+    if (self.options.stepSize > 0)
+    {
+      while (index < chunk.length) {
         self._tokenize(chunk.substr(index, step));
         index += step;
       }
-      callback();
-    } catch(e) {
-      callback(e);
     }
+    else self._tokenize(chunk);
+
+    callback(undefined, chunk);
   })
 };
 
-Tokenizer.prototype._getMatchingRule = function _getMatchingRule(str) {
-  for (var i = 0; i < this._regexes.length; ++i) {
-      if(this._regexes[i].regex.test(str)) {
-        return this._regexes[i];
+Tokenizer.prototype._getLongestMatch = function _getMatchingRule(str) {
+  var bestMatch = undefined,
+      longestMatchLen = 0;
+
+  // Find the longest match that matches at the beginning of the string.
+  for (var i = 0; i < this._regexes.length; i++)
+  {
+    if (this._regexes[i].filter && !this._regexes[i].filter(str))
+      continue;
+
+    var match = undefined,
+        matches = str.match(this._regexes[i].regex);
+
+    if (matches && matches.length)
+    {
+      if ((match = matches[0]).length > longestMatchLen)
+      {
+        longestMatchLen = match.length;
+        bestMatch = {
+          rule: this._regexes[i],
+          match: match,
+          length: match.length,
+          matchesAll: longestMatchLen == str.length
+        };
+
+        if (longestMatchLen == str.length)
+          break;
       }
+    }
   }
-  return null;
+
+  return bestMatch;
 };
 
-Tokenizer.prototype._tokenize = function _tokenize(data, nobuffer) {
-    var regexes = this._regexes;
-    // in case we buffered data on previous writes
-    data = this._buffered + data;
-    this._buffered = '';
-    if(!data.length) {
-      return;
+Tokenizer.prototype._firstMatchLength = function(str, regex) {
+  for (var i = 1; i < str.length; i++)
+    if (regex.test(str.substr(0, i)))
+      return i;
+  return -1;
+}
+
+Tokenizer.prototype._tokenize = function _tokenize(data, endofstream) {
+  // Did we buffered data on previous writes?
+  data = this._buffered + data;
+  this._buffered = '';
+  
+  while (data && data.length)
+  {
+    var match = undefined,
+        str = undefined,
+        ix = -1,
+        removeEOL = false;
+
+    if (this.options.split) {
+      while ((ix = data.search(this.options.split)) == 0)
+      {
+        var len = this._firstMatchLength(data, this.options.split);
+
+        if (len != -1)
+        {
+          this.emit('split', data.substr(0, len));
+
+          data = data.substr(len);
+        }
+        else return;
+      }
+
+      if (ix != -1)
+        removeEOL = true;
+      str = ix != -1 ? data.substr(0, ix) + '\n' : data;
+      data = ix != -1 ? data.substr(ix) : undefined;
     }
+    else {
+      str = data;
+      data = undefined;
+    }
+
+    match = this._getLongestMatch(str);
 
-    var self = this;
-    var maxIndex = disect(0, data.length, function (index) {
-      var buf = data.substring(0, index + 1);
-      return self._getMatchingRule(buf) === null;
-    });
+    if (!match) {
+      var err = new SyntaxError('No rules found to match any part of \'' + str.toString() + '\'');
 
-    if(maxIndex === 0) {
-      // no match found
-      throw new SyntaxError('could not tokenize ' + JSON.stringify(data));
+      if (this._error)
+        this._error(err);
+      else
+        throw err;
     }
-    else if (maxIndex === data.length && !nobuffer) {
-      // the whole string is matching
-      this._buffered = data;
+    else if (match.matchesAll && !endofstream && (!data || !data.length)) {
+      this._buffered = str;
       return;
     }
-    else {
-      // some substring is matching
-      var str = data.substring(0, maxIndex);
-      var rule = this._getMatchingRule(str);
-      if(!rule) {
-        throw new Error('wut ?');
-      }
-      this._gotToken(str, rule);
-      this._tokenize(data.substring(maxIndex), nobuffer);
-    }
+
+    if (removeEOL)
+      str = str.substr(0, str.length - 1);
+
+    data = str.substr(match.length) + (data || '');  
+    str = str.substr(0, match.length);
+
+    this._gotToken(str, match.rule);
+  } // while
 };
 
 Tokenizer.prototype._flush = function _flush(callback) {
-  var self = this;
-  process.nextTick(function () {
-    try {
-      self._tokenize('', true);
-      callback();
-    } catch(e) {
-      callback(e);
-    }
-  });
+  this._tokenize('', true);
+  callback();
 };
 
 var Token = function String (content, type) {
@@ -109,50 +168,50 @@ Token.prototype.valueOf = function valueOf() {
 };
 
 Tokenizer.prototype._gotToken = function _gotToken(str, rule) {
-    // notify the token checker
-    var type = this._checkToken(str, rule) || rule.type;
-    if(this._ignored[type]) return;
-    var token = new Token(str, type);
+  // notify the token checker
+  var type = rule.type || this._checkToken(str, rule);
+  if(this._ignored[type]) return;
+  var token = new Token(str, type);
 
-    this.push(token);
+  this.push(token);
 
-    this.emit('token', token, type);
+  this.emit('token', token, type);
 };
 
-Tokenizer.prototype.addRule = function addRule(regex, type) {
-    // this is useful for built-in rules
-    if(!type) {
-      if(Array.isArray(regex)) {
-        return this.addRule(regex[0], regex[1]);
-      }
-      else if(regex) {
-        return this.addRule(Tokenizer[regex]);
-      }
-      else {
-        throw new Error('No parameters specified');
-      }
+Tokenizer.prototype.addRule = function addRule(regex, type, filter) {
+  // this is useful for built-in rules
+  if(!type) {
+    if(Array.isArray(regex)) {
+      return this.addRule(regex[0], regex[1], filter);
+    }
+    else if(regex) {
+      return this.addRule(Tokenizer[regex], filter);
+    }
+    else {
+      throw new Error('No parameters specified');
     }
-    assert.ok((regex instanceof RegExp) || (typeof regex === 'function'));
-    assert.equal(typeof type, 'string');
-    this._regexes.push({regex:regex,type:type});
+  }
+  assert.ok((regex instanceof RegExp) || (typeof regex === 'function'));
+  assert.equal(typeof type, 'string');
+  this._regexes.push({
+    regex:regex,
+    type:type,
+    filter: filter
+  });
 };
 
 /**
  * set some tokens to be ignored. these won't be emitted
  */
 Tokenizer.prototype.ignore = function ignore(ignored) {
-    if(Array.isArray(ignored)) {
-        for (var i = 0; i < ignored.length; ++i) {
-            this.ignore(ignored[i]);
-        }
-        return;
-    }
-    this._ignored[ignored] = true;
+  if (ignored instanceof Array)
+    return ignored.forEach(this.ignore.bind(this));
+  this._ignored[ignored] = true;
 };
 
 module.exports = Tokenizer;
 
 // built-in rules
-Tokenizer.whitespace    = [/^(\s)+$/, 'whitespace'];
-Tokenizer.word          = [/^\w+$/, 'word'];
-Tokenizer.number        = [/^\d+(\.\d+)?$/, 'number'];
+Tokenizer.whitespace    = [/^(\s)+/, 'whitespace'];
+Tokenizer.word          = [/^\w+/, 'word'];
+Tokenizer.number        = [/^\d+(\.\d+)?/, 'number'];
\ No newline at end of file
diff --git a/package.json b/package.json
index ea984c2..a47cfe0 100644
--- a/package.json
+++ b/package.json
@@ -1,13 +1,15 @@
 {
   "name": "tokenizer",
-  "description": "A wide purpose tokenizer for node.js which looks like a stream",
-  "version": "1.1.2",
+  "description": "A wide purpose tokenizer for node.js which extends the built-in 'stream' module.",
+  "version": "1.2.0",
   "homepage": "http://github.com/floby/node-tokenizer",
   "repository": {
     "type": "git",
     "url": "git://github.com/Floby/node-tokenizer.git"
   },
   "author": "Florent Jaby <florent.jaby@gmail.com>",
+  "contributors": [],
+  
   "main": "lib/Tokenizer.js",
   "scripts": {
     "test": "nodeunit test/test-tokenizer.js"
@@ -20,8 +22,5 @@
   },
   "devDependencies": {
     "nodeunit": "~0.8.1"
-  },
-  "dependencies": {
-    "disect": "~1.1.0"
   }
 }
diff --git a/test/test-perf.js b/test/test-perf.js
index 219cf4e..0f1bb61 100644
--- a/test/test-perf.js
+++ b/test/test-perf.js
@@ -1,10 +1,11 @@
-var tokenizer = require('../');
-var domain = require('domain');
+var tokenizer = require('../'),
+    domain = require('domain');
 
 Function.prototype.withDomain = function(withStack) {
   var fn = this;
   return function(test) {
     var d = domain.create();
+
     d.on('error', function(e) {
       test.fail('test failed with ' + e.message);
       if(withStack) {
@@ -12,6 +13,7 @@ Function.prototype.withDomain = function(withStack) {
       }
       test.done();
     });
+
     d.run(fn.bind(this, test));
   }
 }
@@ -42,20 +44,17 @@ Function.prototype.timed = function (timeout) {
   }
 }
 
-
-
 exports['test big file of small integers'] = function (test) {
   var numbers = [0];
   for (var i = 0; i < 100000; ++i) {
-    numbers.push(Math.floor(Math.random() * 10000));
+    numbers.push(Math.floor(Math.random() * 100000));
   };
-  var t = tokenizer();
+  var t = tokenizer(undefined, {split: /\,/});
   t.addRule('number');
   t.addRule(/^\d+\.$/, 'maybe-float');
   t.addRule('whitespace');
   t.addRule(/^,$/, 'comma');
   t.ignore('whitespace');
-  t.ignore('comma');
   t.on('data', function(token) {
   });
   t.on('end', test.done.bind(test));
diff --git a/test/test-tokenizer.js b/test/test-tokenizer.js
index 5e3e3c3..d708a15 100644
--- a/test/test-tokenizer.js
+++ b/test/test-tokenizer.js
@@ -16,7 +16,6 @@ Function.prototype.withDomain = function(withStack) {
   }
 }
 
-
 exports['test empty'] = function(test) {
   var t = tokenizer();
   t.on('data', test.fail.bind(test, "No data should be emitted"));
@@ -174,3 +173,26 @@ exports['words in two chunks'] = function(test) {
   t.write('Hell');
   t.end('o World');
 }.withDomain();
+
+exports['verify regex priority order and that longest matches first'] = function(test) {
+  //Test case built for a tokenizer I was building that was supposed to parse SLIM template code but was not working.
+  var t = tokenizer(undefined, {split: /^\r?\n+$/});
+  t.addRule(/^([a-zA-Z0-9\-_]+\s*=\s*)(["'])(\\\2|[^"']+)*?\2$/, 'tKeyValue');  // name='value'
+  t.addRule(/^[a-zA-Z0-9\-_]+$/, 'tIdentifier');                                // name
+  t.addRule(/^[#][a-zA-Z0-9\-_]+$/, 'tIdName');                                 // #name
+  t.addRule(/^\.[a-zA-Z0-9\-_]+$/, 'tClassName');                               // .name
+  t.addRule('whitespace');
+  t.ignore('whitespace');
+
+  var expectations = ['tIdentifier', 'tIdName', 'tClassName', 'tKeyValue', 'tKeyValue'];
+
+  t.on('data', function(token) {
+    var e = expectations.shift();
+
+    test.equal(e, token.type);
+  });
+  
+  t.on('end', test.done.bind(test));
+  t.write('tag#id.class var1 = \'value1\' var2 = \'value2\'');
+  t.end();
+}.withDomain();
\ No newline at end of file