diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..5197dfa Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index d8eff1c..319c19f 100644 --- a/.gitignore +++ b/.gitignore @@ -111,3 +111,5 @@ crashlytics.properties crashlytics-build.properties atusdata/ + +bench/ \ No newline at end of file diff --git a/Program Classify Test.ipynb b/Program Classify Test.ipynb new file mode 100644 index 0000000..0756e8c --- /dev/null +++ b/Program Classify Test.ipynb @@ -0,0 +1,1054 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:cd8f99627ce5bb462124da6ee784209610a9c7ca18c3c61f3937278e81d486a2" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import re\n", + "import os\n", + "#from os.path import isfile, join\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.cluster import MiniBatchKMeans\n", + "from sklearn.cross_validation import train_test_split\n", + "from sklearn import metrics" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 68 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "pd.set_option('display.max_rows', 1000)\n", + "pd.set_option('display.max_columns', 1000)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 69 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "extensions = (\".clj\", \".cljs\", \".edn\", \".clojure\",\n", + " \".hs\", \".lhs\", \".ghc\",\".java\", \".jar\",\n", + " \".js\", \".javascript\", \".ml\", \".pl\", \n", + " \".pm\", \".t\", \".pod\", \".php\", \".phtml\", \".ocaml\", \n", + " \".php4\", \".php3\", \".php5\", \".phps\", \".perl\",\n", + " \".py\", \".pyw\", \".pyc\", \".pyo\", \".pyd\", \n", + " \".python3\", \"rb\", \".rbw\", '.ruby', \".jruby\", \".scala\",\n", + " \".scm\", \".ss\", \".racket\", \".tcl\", \".racket\")\n", + "\n", + "\n", + "languages = {\"Clojure: .clj, .cljs, .edn, .clojure\", \"Haskell: .hs, .lhs, .ghc\",\"Java: .java, .jar\",\n", + " \"Javascript: .js, .javascript\", \"OCaml: .ml\", \"Perl: .pl, .pm, .t, .pod\", \n", + " \"PHP: .php, .phtml, .php4, .php3, .php5, .phps\", \"Python: .py, .pyw, .pyc, .pyo, .pyd, .python3\",\n", + " \"Ruby: .rb, .rbw\", \"Scala: .scala\", \"Scheme: .scm, .ss\", \"Tcl: .tcl\"}" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 70 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def open_read_file(file):\n", + " \"\"\"Opens a file and returns it as a string of text.\"\"\"\n", + " with open(file) as text:\n", + " clean = re.sub('[\\t]', ' ', text.read())\n", + " clean = re.sub('[\\n]', '', clean)\n", + " return clean" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 71 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "path = (\"\"\"/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/bench\"\"\")" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 72 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test_path = ('/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test')" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 73 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test_doc = pd.read_csv('/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test.csv')" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 74 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test_doc.head()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FilenameLanguage
0 1 clojure
1 2 clojure
2 3 clojure
3 4 clojure
4 5 python
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 75, + "text": [ + " Filename Language\n", + "0 1 clojure\n", + "1 2 clojure\n", + "2 3 clojure\n", + "3 4 clojure\n", + "4 5 python" + ] + } + ], + "prompt_number": 75 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def get_filepaths(directory):\n", + " file_paths = []\n", + " for root, subdir, files in os.walk(directory):\n", + " for filename in files:\n", + " if filename.endswith(extensions):\n", + " filepath = os.path.join(root, filename)\n", + " file_paths.append(filepath)\n", + " return file_paths" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 76 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def get_test_paths(directory):\n", + " file_paths = []\n", + " for root, subdir, files in os.walk(directory):\n", + " for filename in files:\n", + " filepath = os.path.join(root, filename)\n", + " file_paths.append(filepath)\n", + " return file_paths" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 77 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "file_paths = get_filepaths(path)\n", + "file_paths = file_paths[1:]" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 78 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test_paths = get_test_paths(test_path)\n", + "test_paths.sort()" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 79 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test_paths" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 80, + "text": [ + "['/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/01',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/02',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/03',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/04',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/05',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/06',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/07',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/08',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/09',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/10',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/11',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/12',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/13',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/14',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/15',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/16',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/17',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/18',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/19',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/20',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/21',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/22',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/23',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/24',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/25',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/28',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/29',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/30',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/31',\n", + " '/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/32']" + ] + } + ], + "prompt_number": 80 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test_list = []\n", + "\n", + "for paths in test_paths:\n", + " test_list.append(open_read_file(paths))" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 81 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test_df = np.array(test_list)\n", + "test_df = pd.DataFrame(test_df)\n", + "test_df = test_df.join(test_doc)\n", + "test_df.head()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0FilenameLanguage
0 (defn cf-settings \"Setup settings for campfir... 1 clojure
1 (ns my-cli.core)(defn -main [& args] (println... 2 clojure
2 (extend-type String Person (first-name [s] (... 3 clojure
3 (require '[overtone.live :as overtone])(defn n... 4 clojure
4 from pkgutil import iter_modulesfrom subproces... 5 python
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 82, + "text": [ + " 0 Filename Language\n", + "0 (defn cf-settings \"Setup settings for campfir... 1 clojure\n", + "1 (ns my-cli.core)(defn -main [& args] (println... 2 clojure\n", + "2 (extend-type String Person (first-name [s] (... 3 clojure\n", + "3 (require '[overtone.live :as overtone])(defn n... 4 clojure\n", + "4 from pkgutil import iter_modulesfrom subproces... 5 python" + ] + } + ], + "prompt_number": 82 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + " = []\n", + "\n", + "for paths in file_paths:\n", + " .append(open_read_file(paths))" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 83 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + " = np.array()\n", + " = pd.DataFrame()\n", + "['path'] = [x for x in file_paths]\n", + " = .rename(columns={0: 'Snippet'})\n", + "['extension'] = [os.path.splitext(fp)[-1].lower() for fp in file_paths]" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 84 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def get_lang(ext):\n", + " if ext in ['.clj', '.cljs', '.edn', '.clojure']:\n", + " return 'Clojure'\n", + " elif ext in ['.hs', '.lhs', '.ghc']:\n", + " return 'Haskell'\n", + " elif ext in ['.java', '.jar']:\n", + " return 'Java'\n", + " elif ext in ['.js', '.javascript']:\n", + " return 'Javascript'\n", + " elif ext in ['.ml', '.ocaml']:\n", + " return 'OCaml'\n", + " elif ext in ['.pl', '.pm', '.t', '.pod', '.perl']:\n", + " return 'Perl'\n", + " elif ext in ['.php', '.phtml', '.php4', '.php3', '.php5', '.phps']:\n", + " return 'PHP'\n", + " elif ext in ['.py', '.pyw', '.pyc', '.pyo', '.pyd', '.python3']:\n", + " return 'Python'\n", + " elif ext in ['.rb', '.rbw', '.ruby', '.jruby']:\n", + " return 'Ruby'\n", + " elif ext == '.scala':\n", + " return 'Scala'\n", + " elif ext in ['.scm', '.ss', '.racket']:\n", + " return 'Scheme'" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 85 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "['Language'] = .extension.map(get_lang)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 86 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + ".Language.value_counts()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 87, + "text": [ + "Java 51\n", + "Scala 43\n", + "Clojure 37\n", + "Python 36\n", + "Ruby 34\n", + "Perl 34\n", + "OCaml 34\n", + "Haskell 33\n", + "Scheme 29\n", + "PHP 29\n", + "Javascript 25\n", + "dtype: int64" + ] + } + ], + "prompt_number": 87 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def slash_star(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\"/\\*\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def two_semicolons(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\";{2}\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def print_statement(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".print.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def puts(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".puts.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def val(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".val.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def money(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".\\$.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def caml_star(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\"\\(\\*\" , snippets)))\n", + " return count\n", + "\n", + "\n", + "def star_c(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\"\\*\\)\" , snippets)))\n", + " return count\n", + "\n", + "\n", + "def public(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".public.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def static(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".static.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def void(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".void.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def var(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".var.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def let(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".let.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def require(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".require.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def end(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".end.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def private(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".private.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def double_colon(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".::.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def read_json(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".readJSON.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def arrow(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".->.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def curly_dash(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".{-.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def defn(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".defn.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def pipe(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\" | \", snippets)))\n", + " return count\n", + "\n", + "\n", + "def double_slash(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".//.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def object_str(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".object.\", snippets)))\n", + " return count\n", + "\n", + "def elif_str(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".elif.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def else_str(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\"else:\", snippets)))\n", + " return count \n", + "\n", + "\n", + "def implicit(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".implicit.\", snippets)))\n", + " return count\n", + "\n", + "\n", + "def extends(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r\".extends.\", snippets)))\n", + " return count \n", + "\n", + "\n", + "def triple_quotes(snippets):\n", + " count = 0\n", + " count = len(list(re.finditer(r'.\"\"\".', snippets)))\n", + " return count \n", + "\n" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 53 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "[';;'] = ['Snippet'].apply(two_semicolons)\n", + "['/*'] = ['Snippet'].apply(slash_star)\n", + "['print'] = ['Snippet'].apply(print_statement)\n", + "['val'] = ['Snippet'].apply(val)\n", + "['$'] = ['Snippet'].apply(money)\n", + "['(*'] = ['Snippet'].apply(caml_star)\n", + "['*)'] = ['Snippet'].apply(star_c)\n", + "['static'] = ['Snippet'].apply(static)\n", + "['var'] = ['Snippet'].apply(var)\n", + "['let'] = ['Snippet'].apply(let)\n", + "['end'] = ['Snippet'].apply(end)\n", + "['::'] = ['Snippet'].apply(double_colon)\n", + "['defn'] = ['Snippet'].apply(defn)\n", + "['|'] = ['Snippet'].apply(pipe)\n", + "['//'] = ['Snippet'].apply(double_slash)\n", + "['object'] = ['Snippet'].apply(object_str)\n", + "['elif'] = ['Snippet'].apply(elif_str)\n", + "['else'] = ['Snippet'].apply(else_str)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 22 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "grouped = .groupby('Language')" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 23 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "#grouped.describe()" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 67 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test_df = test_df.rename(columns={0: 'Snippet'})" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 25 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "train_data = .drop(['extension', 'Language', 'path', 'Snippet'], axis=1)\n", + "results = [['Language']]\n", + "train_data.head()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
;;/*printval$(**)staticvarletend::defn|//objectelifelse
0 10 0 3 0 0 2 0 0 0 7 1 0 5 632 1 0 0 0
1 14 0 3 2 0 1 0 3 0 8 0 0 6 682 1 0 0 0
2 0 0 1 0 0 0 0 0 0 4 0 4 0 373 1 0 0 0
3 0 0 1 0 0 0 0 0 0 5 0 4 0 455 1 0 0 0
4 0 0 1 0 0 0 0 0 0 5 0 4 0 437 1 0 0 0
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 26, + "text": [ + " ;; /* print val $ (* *) static var let end :: defn | // \\\n", + "0 10 0 3 0 0 2 0 0 0 7 1 0 5 632 1 \n", + "1 14 0 3 2 0 1 0 3 0 8 0 0 6 682 1 \n", + "2 0 0 1 0 0 0 0 0 0 4 0 4 0 373 1 \n", + "3 0 0 1 0 0 0 0 0 0 5 0 4 0 455 1 \n", + "4 0 0 1 0 0 0 0 0 0 5 0 4 0 437 1 \n", + "\n", + " object elif else \n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 0 0 \n", + "3 0 0 0 \n", + "4 0 0 0 " + ] + } + ], + "prompt_number": 26 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "X_train, X_test, y_train, y_test = train_test_split(train_data, results,\n", + " test_size=0.4, random_state=0)\n", + "\n" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 27 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "classifier = RandomForestClassifier()\n", + "classifier.fit(X_train, y_train)\n", + "predicted = classifier.predict(X_test)\n", + "\n", + "print(metrics.classification_report(y_test, predicted))\n", + "print(metrics.confusion_matrix(y_test, predicted))\n", + "print(metrics.f1_score(y_test, predicted))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + " precision recall f1-score support\n", + "\n", + " Clojure 1.00 1.00 1.00 14\n", + " Haskell 0.90 0.82 0.86 11\n", + " Java 0.95 1.00 0.97 18\n", + " Javascript 0.92 0.92 0.92 13\n", + " OCaml 0.93 1.00 0.96 13\n", + " PHP 0.82 1.00 0.90 9\n", + " Perl 1.00 0.84 0.91 19\n", + " Python 0.85 0.94 0.89 18\n", + " Ruby 1.00 0.88 0.94 17\n", + " Scala 1.00 1.00 1.00 17\n", + " Scheme 1.00 1.00 1.00 5\n", + "\n", + "avg / total 0.95 0.94 0.94 154\n", + "\n", + "[[14 0 0 0 0 0 0 0 0 0 0]\n", + " [ 0 9 0 0 0 1 0 1 0 0 0]\n", + " [ 0 0 18 0 0 0 0 0 0 0 0]\n", + " [ 0 0 1 12 0 0 0 0 0 0 0]\n", + " [ 0 0 0 0 13 0 0 0 0 0 0]\n", + " [ 0 0 0 0 0 9 0 0 0 0 0]\n", + " [ 0 1 0 0 1 1 16 0 0 0 0]\n", + " [ 0 0 0 1 0 0 0 17 0 0 0]\n", + " [ 0 0 0 0 0 0 0 2 15 0 0]\n", + " [ 0 0 0 0 0 0 0 0 0 17 0]\n", + " [ 0 0 0 0 0 0 0 0 0 0 5]]\n", + "0.941394773961\n" + ] + }, + { + "output_type": "stream", + "stream": "stderr", + "text": [ + "-c:2: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n" + ] + } + ], + "prompt_number": 28 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "tree = DecisionTreeClassifier()\n", + "tree = tree.fit(X_train, y_train)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 29 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "tree.feature_importances_" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 30, + "text": [ + "array([ 0.10153503, 0.04890005, 0.01643699, 0.00719118, 0.09096754,\n", + " 0. , 0.08868024, 0.14464065, 0.05134603, 0.05267018,\n", + " 0.0557526 , 0.08308486, 0.10458043, 0.02301179, 0. ,\n", + " 0.10921034, 0. , 0.02199209])" + ] + } + ], + "prompt_number": 30 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test_df[';;'] = test_df['Snippet'].apply(two_semicolons)\n", + "test_df['/*'] = test_df['Snippet'].apply(slash_star)\n", + "test_df['print'] = test_df['Snippet'].apply(print_statement)\n", + "test_df['*)'] = test_df['Snippet'].apply(star_c)\n", + "test_df['val'] = test_df['Snippet'].apply(val)\n", + "test_df['$'] = test_df['Snippet'].apply(money)\n", + "test_df['(*'] = test_df['Snippet'].apply(caml_star)\n", + "test_df['static'] = test_df['Snippet'].apply(static)\n", + "test_df['var'] = test_df['Snippet'].apply(var)\n", + "test_df['let'] = test_df['Snippet'].apply(let)\n", + "test_df['end'] = test_df['Snippet'].apply(end)\n", + "test_df['::'] = test_df['Snippet'].apply(double_colon)\n", + "test_df['defn'] = test_df['Snippet'].apply(defn)\n", + "test_df['|'] = test_df['Snippet'].apply(pipe)\n", + "test_df['//'] = test_df['Snippet'].apply(double_slash)\n", + "test_df['object'] = test_df['Snippet'].apply(object_str)\n", + "test_df['elif'] = test_df['Snippet'].apply(elif_str)\n", + "test_df['else'] = test_df['Snippet'].apply(else_str)\n", + "test_df['implicit'] = test_df['Snippet'].apply(implicit)\n", + "test_df['extends'] = test_df['Snippet'].apply(extends)\n", + "test_df['\"\"\"'] = test_df['Snippet'].apply(triple_quotes)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 54 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 36 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 36 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/Untitled0.ipynb b/Untitled0.ipynb new file mode 100644 index 0000000..c348bef --- /dev/null +++ b/Untitled0.ipynb @@ -0,0 +1,111 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:eaed2b782c2a0e9c628dc2284a212eab46217bad4829a649725526f4c0742a96" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from open_parse import *\n", + "import classifier_train\n", + "import pickle\n", + "import re" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "path = (\"/Users/chameleonsrock/ironyard/sandbox\"\n", + " \"/programming-language-classifier/bench\")" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "train = open_and_parse(path)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 3 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "with open(\"/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/rf_programming.dat\", \"rb\") as file:\n", + " classifier = pickle.load(file)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 4 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def id_code(snippet_path):\n", + " \"\"\"Opens a file, parses it, then applies a trained classifier to return\n", + " the predicted language.\"\"\"\n", + " snippet = open_and_parse_single(snippet_path)\n", + " snippet = snippet.drop(['Snippet'], axis=1)\n", + " return snippet" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 5 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "snippet = id_code('/Users/chameleonsrock/ironyard/sandbox/programming-language-classifier/test/13')" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 28 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "classifier.predict(snippet)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 29, + "text": [ + "array(['Perl'], dtype=object)" + ] + } + ], + "prompt_number": 29 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/classifier_train.py b/classifier_train.py new file mode 100644 index 0000000..cb3117b --- /dev/null +++ b/classifier_train.py @@ -0,0 +1,37 @@ +import re +import os +from os.path import isfile, join +import numpy as np +import pandas as pd +from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier +from sklearn.tree import DecisionTreeClassifier +from sklearn.cross_validation import train_test_split +from sklearn import metrics +from open_parse import * +import pickle + +if __name__ == '__main__': + + path = ("/Users/chameleonsrock/ironyard/sandbox" + "/programming-language-classifier/bench") + + train = open_and_parse(path) + file_paths = get_filepaths(path) + train['extension'] = [os.path.splitext(fp)[-1].lower() for fp in file_paths] + train['Language'] = train.extension.map(get_lang) + + train_data = train.drop(['extension', 'Language', 'Snippet'], axis=1) + results = train['Language'].values + + X_train, X_test, y_train, y_test = train_test_split(train_data, + results, + test_size=0.4, + random_state=0) + + classifier = RandomForestClassifier() + classifier.fit(X_train, y_train) + + with open("/Users/chameleonsrock/ironyard/sandbox" + "/programming-language-classifier" + "/rf_programming.dat", "wb") as f: + pickle.dump(classifier, f) diff --git a/get_language b/get_language new file mode 100644 index 0000000..09d230d --- /dev/null +++ b/get_language @@ -0,0 +1,11 @@ +with open("/Users/chameleonsrock/ironyard/sandbox" + "/programming-language-classifier/" + "rf_programming.dat", "rb") as file: + classifier = pickle.load(file) + + +def get_language(snippet_path): + """Opens a file, parses it, then applies a trained classifier to return + the predicted language.""" + snippet = open_and_parse_single(snippet_path) + return classifier.predict(snippet) diff --git a/open_parse.py b/open_parse.py new file mode 100644 index 0000000..3cae48d --- /dev/null +++ b/open_parse.py @@ -0,0 +1,382 @@ +import os +import re +import numpy as np +import pandas as pd +from sklearn.ensemble import RandomForestClassifier +from sklearn.cross_validation import train_test_split +from sklearn import metrics + + +def open_read_file(file): + """Opens a file and returns it as a string of text.""" + with open(file) as text: + clean = re.sub('[\t]', ' ', text.read()) + clean = re.sub('[\n]', '', clean) + return clean + + +def get_filepaths(directory): + """Obtains the desired file paths for files in a directory and its subs.""" + extensions = (".clj", ".cljs", ".edn", ".clojure", + ".hs", ".lhs", ".ghc",".java", ".jar", + ".js", ".javascript", ".ml", ".pl", + ".pm", ".t", ".pod", ".php", ".phtml", ".ocaml", + ".php4", ".php3", ".php5", ".phps", ".perl", + ".py", ".pyw", ".pyc", ".pyo", ".pyd", + ".python3", "rb", ".rbw", '.ruby', ".jruby", ".scala", + ".scm", ".ss", ".racket", ".tcl", ".racket") + file_paths = [] + for root, subdir, files in os.walk(directory): + for filename in files: + if filename.endswith(extensions): + filepath = os.path.join(root, filename) + file_paths.append(filepath) + return file_paths + + +def get_lang(ext): + """Returns the name of the language of a file based on the extension.""" + if ext in ['.clj', '.cljs', '.edn', '.clojure']: + return 'Clojure' + elif ext in ['.hs', '.lhs', '.ghc']: + return 'Haskell' + elif ext in ['.java', '.jar']: + return 'Java' + elif ext in ['.js', '.javascript']: + return 'Javascript' + elif ext in ['.ml', '.ocaml']: + return 'OCaml' + elif ext in ['.pl', '.pm', '.t', '.pod', '.perl']: + return 'Perl' + elif ext in ['.php', '.phtml', '.php4', '.php3', '.php5', '.phps']: + return 'PHP' + elif ext in ['.py', '.pyw', '.pyc', '.pyo', '.pyd', '.python3']: + return 'Python' + elif ext in ['.rb', '.rbw', '.ruby', '.jruby']: + return 'Ruby' + elif ext == '.scala': + return 'Scala' + elif ext in ['.scm', '.ss', '.racket']: + return 'Scheme' + + +def slash_star(snippets): + count = 0 + count = len(list(re.finditer(r"/\*", snippets))) + return count + + +def two_semicolons(snippets): + count = 0 + count = len(list(re.finditer(r";{2}", snippets))) + return count + + +def print_statement(snippets): + count = 0 + count = len(list(re.finditer(r".print.", snippets))) + return count + + +def puts(snippets): + count = 0 + count = len(list(re.finditer(r".puts.", snippets))) + return count + + +def val(snippets): + count = 0 + count = len(list(re.finditer(r".val.", snippets))) + return count + + +def money(snippets): + count = 0 + count = len(list(re.finditer(r".\$.", snippets))) + return count + + +def caml_star(snippets): + count = 0 + count = len(list(re.finditer(r"\(\*" , snippets))) + return count + + +def star_c(snippets): + count = 0 + count = len(list(re.finditer(r"\*\)" , snippets))) + return count + + +def public(snippets): + count = 0 + count = len(list(re.finditer(r".public.", snippets))) + return count + + +def static(snippets): + count = 0 + count = len(list(re.finditer(r".static.", snippets))) + return count + + +def void(snippets): + count = 0 + count = len(list(re.finditer(r".void.", snippets))) + return count + + +def var(snippets): + count = 0 + count = len(list(re.finditer(r".var.", snippets))) + return count + + +def let(snippets): + count = 0 + count = len(list(re.finditer(r".let.", snippets))) + return count + + +def require(snippets): + count = 0 + count = len(list(re.finditer(r".require.", snippets))) + return count + + +def end(snippets): + count = 0 + count = len(list(re.finditer(r".end.", snippets))) + return count + + +def private(snippets): + count = 0 + count = len(list(re.finditer(r".private.", snippets))) + return count + + +def double_colon(snippets): + count = 0 + count = len(list(re.finditer(r".::.", snippets))) + return count + + +def read_json(snippets): + count = 0 + count = len(list(re.finditer(r".readJSON.", snippets))) + return count + + +def arrow(snippets): + count = 0 + count = len(list(re.finditer(r".->.", snippets))) + return count + + +def curly_dash(snippets): + count = 0 + count = len(list(re.finditer(r".{-.", snippets))) + return count + + +def defn(snippets): + count = 0 + count = len(list(re.finditer(r".defn.", snippets))) + return count + + +def pipe(snippets): + count = 0 + count = len(list(re.finditer(r" | ", snippets))) + return count + + +def double_slash(snippets): + count = 0 + count = len(list(re.finditer(r".// .", snippets))) + return count + + +def object_str(snippets): + count = 0 + count = len(list(re.finditer(r".object.", snippets))) + return count + + +def elif_str(snippets): + count = 0 + count = len(list(re.finditer(r".elif.", snippets))) + return count + + +def else_str(snippets): + count = 0 + count = len(list(re.finditer(r"else:", snippets))) + return count + + +def implicit(snippets): + count = 0 + count = len(list(re.finditer(r".implicit.", snippets))) + return count + + +def extends(snippets): + count = 0 + count = len(list(re.finditer(r".extends.", snippets))) + return count + + +def triple_quotes(snippets): + count = 0 + count = len(list(re.finditer(r'.""".', snippets))) + return count + + +def import_str(snippets): + count = 0 + count = len(list(re.finditer(r'.import.', snippets))) + return count + + +def dollar_format(snippets): + count = 0 + count = len(list(re.finditer(r'.\$format.', snippets))) + return count + + +def return_str(snippets): + count = 0 + count = len(list(re.finditer(r'.return.', snippets))) + return count + + +def dollar_container(snippets): + count = 0 + count = len(list(re.finditer(r'.\$container.', snippets))) + return count + + +def semi_space(snippets): + count = 0 + count = len(list(re.finditer(r'.; .', snippets))) + return count + + +def dunder_init(snippets): + count = 0 + count = len(list(re.finditer(r'.__init__.', snippets))) + return count + + +def parens_define(snippets): + count = 0 + count = len(list(re.finditer(r'.\(define.', snippets))) + return count + + +def parens_semi(snippets): + count = 0 + count = len(list(re.finditer(r'.\);.', snippets))) + return count + + +def class_str(snippets): + count = 0 + count = len(list(re.finditer(r'.class.', snippets))) + return count + + +def do(snippets): + count = 0 + count = len(list(re.finditer(r'.do.', snippets))) + return count + + +def parens_true(snippets): + count = 0 + count = len(list(re.finditer(r'.\(true\).', snippets))) + return count + + +def open_and_parse(path): + """Takes a directory path and returns a dataframe of all the desired files + with their corresponding feature scores.""" + file_paths = get_filepaths(path) + df = [] + for paths in file_paths: + df.append(open_read_file(paths)) + df = np.array(df) + df = pd.DataFrame(df) + df = df.rename(columns={0: 'Snippet'}) + df[';;'] = df['Snippet'].apply(two_semicolons) + df['/*'] = df['Snippet'].apply(slash_star) + #df['print'] = df['Snippet'].apply(print_statement) + df['val'] = df['Snippet'].apply(val) + df['$'] = df['Snippet'].apply(money) + df['(*'] = df['Snippet'].apply(caml_star) + df['*)'] = df['Snippet'].apply(star_c) + df['static'] = df['Snippet'].apply(static) + df['var'] = df['Snippet'].apply(var) + df['let'] = df['Snippet'].apply(let) + df['end'] = df['Snippet'].apply(end) + df['::'] = df['Snippet'].apply(double_colon) + df['defn'] = df['Snippet'].apply(defn) + df['|'] = df['Snippet'].apply(pipe) + df['//'] = df['Snippet'].apply(double_slash) + df['object'] = df['Snippet'].apply(object_str) + df['elif'] = df['Snippet'].apply(elif_str) + df['else'] = df['Snippet'].apply(else_str) + df['import'] = df['Snippet'].apply(import_str) + df['$format'] = df['Snippet'].apply(dollar_format) + df['return'] = df['Snippet'].apply(return_str) + df['$container'] = df['Snippet'].apply(dollar_container) + #df['; '] = df['Snippet'].apply(semi_space) + df['__init__'] = df['Snippet'].apply(dunder_init) + df['(define'] = df['Snippet'].apply(parens_define) + df[');'] = df['Snippet'].apply(parens_semi) + df['class'] = df['Snippet'].apply(class_str) + df['do'] = df['Snippet'].apply(do) + df['(true)'] = df['Snippet'].apply(parens_true) + return df + + +def open_and_parse_single(path): + """Takes a file path and returns a dataframe with the file's + corresponding feature scores.""" + df = [] + df.append(open_read_file(path)) + df = np.array(df) + df = pd.DataFrame(df) + df = df.rename(columns={0: 'Snippet'}) + df[';;'] = df['Snippet'].apply(two_semicolons) + df['/*'] = df['Snippet'].apply(slash_star) + #df['print'] = df['Snippet'].apply(print_statement) + df['val'] = df['Snippet'].apply(val) + df['$'] = df['Snippet'].apply(money) + df['(*'] = df['Snippet'].apply(caml_star) + df['*)'] = df['Snippet'].apply(star_c) + df['static'] = df['Snippet'].apply(static) + df['var'] = df['Snippet'].apply(var) + df['let'] = df['Snippet'].apply(let) + df['end'] = df['Snippet'].apply(end) + df['::'] = df['Snippet'].apply(double_colon) + df['defn'] = df['Snippet'].apply(defn) + df['|'] = df['Snippet'].apply(pipe) + df['//'] = df['Snippet'].apply(double_slash) + df['object'] = df['Snippet'].apply(object_str) + df['elif'] = df['Snippet'].apply(elif_str) + df['else'] = df['Snippet'].apply(else_str) + df['import'] = df['Snippet'].apply(import_str) + df['$format'] = df['Snippet'].apply(dollar_format) + df['return'] = df['Snippet'].apply(return_str) + df['$container'] = df['Snippet'].apply(dollar_container) + #df['; '] = df['Snippet'].apply(semi_space) + df['__init__'] = df['Snippet'].apply(dunder_init) + df['(define'] = df['Snippet'].apply(parens_define) + df[');'] = df['Snippet'].apply(parens_semi) + df['class'] = df['Snippet'].apply(class_str) + df['do'] = df['Snippet'].apply(do) + df['(true)'] = df['Snippet'].apply(parens_true) + return df diff --git a/requirements.txt b/requirements.txt index 9170871..41d8760 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,6 @@ scikit-learn -textblob \ No newline at end of file +textblob +re +os +numpy +pandas diff --git a/rf_programming.dat b/rf_programming.dat new file mode 100644 index 0000000..8f59d29 Binary files /dev/null and b/rf_programming.dat differ diff --git a/test.csv b/test.csv index adbf5dd..28d6f93 100644 --- a/test.csv +++ b/test.csv @@ -24,10 +24,8 @@ Filename,Language 23,java 24,scala 25,scala -26,tcl -27,tcl +26,php +27,php 28,php -29,php -30,php -31,ocaml -32,ocaml +29,ocaml +30,ocaml diff --git a/test/.DS_Store b/test/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/test/.DS_Store differ diff --git a/test/1 b/test/01 similarity index 100% rename from test/1 rename to test/01 diff --git a/test/2 b/test/02 similarity index 100% rename from test/2 rename to test/02 diff --git a/test/3 b/test/03 similarity index 100% rename from test/3 rename to test/03 diff --git a/test/4 b/test/04 similarity index 100% rename from test/4 rename to test/04 diff --git a/test/5 b/test/05 similarity index 100% rename from test/5 rename to test/05 diff --git a/test/6 b/test/06 similarity index 100% rename from test/6 rename to test/06 diff --git a/test/7 b/test/07 similarity index 100% rename from test/7 rename to test/07 diff --git a/test/8 b/test/08 similarity index 100% rename from test/8 rename to test/08 diff --git a/test/9 b/test/09 similarity index 100% rename from test/9 rename to test/09 diff --git a/test/26 b/test/26 deleted file mode 100644 index 182f919..0000000 --- a/test/26 +++ /dev/null @@ -1,35 +0,0 @@ -proc isaac::mix {a b c d e f g h} { - set a [expr {($a ^ ($b << 11)) & 0xffffffff}] - set d [expr {($d + $a) & 0xffffffff}] - set b [expr {($b + $c) & 0xffffffff}] - - set b [expr {($b ^ ($c >> 2)) & 0xffffffff}] - set e [expr {($e + $b) & 0xffffffff}] - set c [expr {($c + $d) & 0xffffffff}] - - set c [expr {($c ^ ($d << 8)) & 0xffffffff}] - set f [expr {($f + $c) & 0xffffffff}] - set d [expr {($d + $e) & 0xffffffff}] - - set d [expr {($d ^ ($e >> 16)) & 0xffffffff}] - set g [expr {($g + $d) & 0xffffffff}] - set e [expr {($e + $f) & 0xffffffff}] - - set e [expr {($e ^ ($f << 10)) & 0xffffffff}] - set h [expr {($h + $e) & 0xffffffff}] - set f [expr {($f + $g) & 0xffffffff}] - - set f [expr {($f ^ ($g >> 4)) & 0xffffffff}] - set a [expr {($a + $f) & 0xffffffff}] - set g [expr {($g + $h) & 0xffffffff}] - - set g [expr {($g ^ ($h << 8)) & 0xffffffff}] - set b [expr {($b + $g) & 0xffffffff}] - set h [expr {($h + $a) & 0xffffffff}] - - set h [expr {($h ^ ($a >> 9)) & 0xffffffff}] - set c [expr {($c + $h) & 0xffffffff}] - set a [expr {($a + $b) & 0xffffffff}] - - return [list $a $b $c $d $e $f $g $h] -} diff --git a/test/27 b/test/27 deleted file mode 100644 index 902ec5c..0000000 --- a/test/27 +++ /dev/null @@ -1,20 +0,0 @@ -proc twitter::follow {nick uhost hand chan argv} { - if {![channel get $chan twitter]} { return } - - if {[string length $argv] < 1} { - $twitter::output_cmd "PRIVMSG $chan :Usage: !follow " - return - } - - if {[catch {::twitlib::query $::twitlib::follow_url [list screen_name $argv]} result]} { - $twitter::output_cmd "PRIVMSG $chan :Twitter failed or already friends with $argv!" - return - } - - if {[dict exists $result error]} { - twitter::output $chan "Follow failed ($argv): [dict get $result error]" - return - } - - twitter::output $chan "Now following [dict get $result screen_name]!" -} \ No newline at end of file diff --git a/test_suite/open_parse_test.py b/test_suite/open_parse_test.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/test_suite/open_parse_test.py @@ -0,0 +1 @@ +