From 5abb42e339759dfb1125e9ad568114c08faedeaf Mon Sep 17 00:00:00 2001 From: Ashish Rana Date: Fri, 20 Apr 2018 16:56:45 +0530 Subject: [PATCH] numpy import added & Minor typo fixes --- vocabulary-embedding.ipynb | 93 ++++++++++++-------------------------- 1 file changed, 29 insertions(+), 64 deletions(-) diff --git a/vocabulary-embedding.ipynb b/vocabulary-embedding.ipynb index 93b4066..c6b8cac 100644 --- a/vocabulary-embedding.ipynb +++ b/vocabulary-embedding.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Generate intial word embedding for headlines and description" + "Generate initial word embedding for headlines and description" ] }, { @@ -67,7 +67,7 @@ }, "outputs": [], "source": [ - "lower = False # dont lower case the text" + "lower = False # don't lower case the text" ] }, { @@ -118,9 +118,7 @@ { "cell_type": "code", "execution_count": 11, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -141,9 +139,7 @@ { "cell_type": "code", "execution_count": 12, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -163,9 +159,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -192,9 +186,7 @@ { "cell_type": "code", "execution_count": 14, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -214,9 +206,7 @@ { "cell_type": "code", "execution_count": 15, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -279,9 +269,7 @@ { "cell_type": "code", "execution_count": 18, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -300,9 +288,7 @@ { "cell_type": "code", "execution_count": 19, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stderr", @@ -382,9 +368,7 @@ { "cell_type": "code", "execution_count": 23, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "word2idx, idx2word = get_idx(vocab, vocabcount)" @@ -428,9 +412,7 @@ { "cell_type": "code", "execution_count": 25, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -457,6 +439,8 @@ }, "outputs": [], "source": [ + "import numpy as np\n", + "\n", "glove_index_dict = {}\n", "glove_embedding_weights = np.empty((glove_n_symbols, embedding_dim))\n", "globale_scale=.1\n", @@ -474,9 +458,7 @@ { "cell_type": "code", "execution_count": 27, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -524,9 +506,7 @@ { "cell_type": "code", "execution_count": 30, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -552,7 +532,7 @@ "for i in range(vocab_size):\n", " w = idx2word[i]\n", " g = glove_index_dict.get(w, glove_index_dict.get(w.lower()))\n", - " if g is None and w.startswith('#'): # glove has no hastags (I think...)\n", + " if g is None and w.startswith('#'): # glove has no hashtags (I think...)\n", " w = w[1:]\n", " g = glove_index_dict.get(w, glove_index_dict.get(w.lower()))\n", " if g is not None:\n", @@ -618,9 +598,7 @@ { "cell_type": "code", "execution_count": 47, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -666,9 +644,7 @@ { "cell_type": "code", "execution_count": 48, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -720,9 +696,7 @@ { "cell_type": "code", "execution_count": 50, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -743,9 +717,7 @@ { "cell_type": "code", "execution_count": 51, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -765,9 +737,7 @@ { "cell_type": "code", "execution_count": 52, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -789,7 +759,6 @@ "cell_type": "code", "execution_count": 53, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -811,9 +780,7 @@ { "cell_type": "code", "execution_count": 54, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "import cPickle as pickle\n", @@ -824,9 +791,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "import cPickle as pickle\n", @@ -837,23 +802,23 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.5.2" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 }