From 2ab4f54b19a65bba46db554d3c532cd98d5d3677 Mon Sep 17 00:00:00 2001 From: maximus12793 Date: Mon, 16 Oct 2017 22:12:03 -0700 Subject: [PATCH 1/2] depreciation warning fix gensim --- lda2vec/corpus.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/lda2vec/corpus.py b/lda2vec/corpus.py index b93aae0a..5405a7c7 100644 --- a/lda2vec/corpus.py +++ b/lda2vec/corpus.py @@ -532,7 +532,9 @@ def compact_word_vectors(self, vocab, filename=None, array=None, """ n_words = len(self.compact_to_loose) from gensim.models.word2vec import Word2Vec - model = Word2Vec.load_word2vec_format(filename, binary=True) + model = gensim.models.KeyedVectors.load_word2vec_format( + filename, binary=True) + n_dim = model.syn0.shape[1] data = np.random.normal(size=(n_words, n_dim)).astype('float32') data -= data.mean() @@ -548,9 +550,12 @@ def compact_word_vectors(self, vocab, filename=None, array=None, choices = np.array(keys, dtype='S') lengths = np.array(lens, dtype='int32') s, f = 0, 0 - rep0 = lambda w: w - rep1 = lambda w: w.replace(' ', '_') - rep2 = lambda w: w.title().replace(' ', '_') + + def rep0(w): return w + + def rep1(w): return w.replace(' ', '_') + + def rep2(w): return w.title().replace(' ', '_') reps = [rep0, rep1, rep2] for compact in np.arange(top): loose = self.compact_to_loose.get(compact, None) @@ -677,8 +682,9 @@ def compact_to_coocurrence(self, word_compact, indices, window_size=10): for name, index in indices.items(): tokens[name] = index a, b = tokens.copy(), tokens.copy() - mask = lambda x: np.prod([x[k + '_x'] == x[k + '_y'] - for k in indices.keys()], axis=0) + + def mask(x): return np.prod([x[k + '_x'] == x[k + '_y'] + for k in indices.keys()], axis=0) group_keys = ['word_index_x', 'word_index_y', ] group_keys += [k + '_x' for k in indices.keys()] total = [] From 05bdde16340e8b6edce01b74fe56aa2677a14152 Mon Sep 17 00:00:00 2001 From: maximus12793 Date: Mon, 16 Oct 2017 22:19:42 -0700 Subject: [PATCH 2/2] style edit --- lda2vec/corpus.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lda2vec/corpus.py b/lda2vec/corpus.py index 5405a7c7..01971e77 100644 --- a/lda2vec/corpus.py +++ b/lda2vec/corpus.py @@ -1,6 +1,5 @@ from collections import defaultdict import numpy as np -import difflib import pandas as pd try: @@ -531,7 +530,7 @@ def compact_word_vectors(self, vocab, filename=None, array=None, True """ n_words = len(self.compact_to_loose) - from gensim.models.word2vec import Word2Vec + import gensim model = gensim.models.KeyedVectors.load_word2vec_format( filename, binary=True) @@ -579,7 +578,6 @@ def rep2(w): return w.title().replace(' ', '_') sel = choices[idx] d = damerau_levenshtein_distance_withNPArray(word, sel) choice = np.array(keys_raw)[idx][np.argmin(d)] - # choice = difflib.get_close_matches(word, choices)[0] vector = model[choice] print compact, word, ' --> ', choice except IndexError: