Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ The purpose of this code is mostly to wrangle text between various human and mac

## Requirements

I'm running this code on Ubuntu 14.04 with Python 2.7. Unfortunately it does not work with Python 3, though apparently it isn't too hard to use 2to3 to automatically convert it.
Updated to Python 3; tested specifically on Python 3.4.3 on 64-bit Linux Mint 17.3

For the most part it should work out of the box, though there are a few optional bonus features that will make it much better. See [DEPENDENCIES.md](https://github.com/billzorn/mtgencode/blob/master/DEPENDENCIES.md#dependencies).

Expand Down
22 changes: 12 additions & 10 deletions decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def main(fname, oname = None, verbose = True, encoding = 'std',
# there is a sane thing to do here (namely, produce both at the same time)
# but we don't support it yet.
if for_mse and for_html:
print 'ERROR - decode.py - incompatible formats "mse" and "html"'
print('ERROR - decode.py - incompatible formats "mse" and "html"')
return

fmt_ordered = cardlib.fmt_ordered_default
Expand Down Expand Up @@ -52,16 +52,16 @@ def main(fname, oname = None, verbose = True, encoding = 'std',
namediff = Namediff()
cbow = CBOW()
if verbose:
print 'Computing nearest names...'
nearest_names = namediff.nearest_par(map(lambda c: c.name, cards), n=3)
print('Computing nearest names...')
nearest_names = namediff.nearest_par([c.name for c in cards], n=3)
if verbose:
print 'Computing nearest cards...'
print('Computing nearest cards...')
nearest_cards = cbow.nearest_par(cards)
for i in range(0, len(cards)):
cards[i].nearest_names = nearest_names[i]
cards[i].nearest_cards = nearest_cards[i]
if verbose:
print '...Done.'
print('...Done.')

def hoverimg(cardname, dist, nd):
truename = nd.names[cardname]
Expand Down Expand Up @@ -238,17 +238,18 @@ def sort_cmc(card_set):

if oname:
if for_html:
print oname
print(oname)
# if ('.html' != oname[-])
# oname += '.html'
if verbose:
print 'Writing output to: ' + oname
print('Writing output to: ' + oname)
with open(oname, 'w') as ofile:
writecards(ofile)
if for_mse:
# Copy whatever output file is produced, name the copy 'set' (yes, no extension).
# Copy whatever output file is produced, name the copy 'set' (yes,
# no extension).
if os.path.isfile('set'):
print 'ERROR: tried to overwrite existing file "set" - aborting.'
print('ERROR: tried to overwrite existing file "set" - aborting.')
return
shutil.copyfile(oname, 'set')
# Use the freaky mse extension instead of zip.
Expand All @@ -258,7 +259,8 @@ def sort_cmc(card_set):
zf.write('set')
finally:
if verbose:
print 'Made an MSE set file called ' + oname + '.mse-set.'
print('Made an MSE set file called ' +
oname + '.mse-set.')
# The set file is useless outside the .mse-set, delete it.
os.remove('set')
else:
Expand Down
14 changes: 7 additions & 7 deletions encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,16 @@ def main(fname, oname = None, verbose = True, encoding = 'std',
raise ValueError('encode.py: unknown encoding: ' + encoding)

if verbose:
print 'Preparing to encode:'
print ' Using encoding ' + repr(encoding)
print('Preparing to encode:')
print(' Using encoding ' + repr(encoding))
if stable:
print ' NOT randomizing order of cards.'
print(' NOT randomizing order of cards.')
if randomize_mana:
print ' Randomizing order of symobls in manacosts.'
print(' Randomizing order of symobls in manacosts.')
if not fmt_labeled:
print ' NOT labeling fields for this run (may be harder to decode).'
print(' NOT labeling fields for this run (may be harder to decode).')
if not line_transformations:
print ' NOT using line reordering transformations'
print(' NOT using line reordering transformations')

cards = jdecode.mtg_open_file(fname, verbose=verbose, linetrans=line_transformations)

Expand All @@ -82,7 +82,7 @@ def writecards(writer):

if oname:
if verbose:
print 'Writing output to: ' + oname
print('Writing output to: ' + oname)
with open(oname, 'w') as ofile:
writecards(ofile)
else:
Expand Down
45 changes: 22 additions & 23 deletions lib/cardlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,20 +247,19 @@ def fields_from_json(src_json, linetrans = True):
fields[field_cost] = [(-1, cost)]

if 'supertypes' in src_json:
fields[field_supertypes] = [(-1, map(lambda s: utils.to_ascii(s.lower()),
src_json['supertypes']))]
fields[field_supertypes] = [
(-1, [utils.to_ascii(s.lower()) for s in src_json['supertypes']])]

if 'types' in src_json:
fields[field_types] = [(-1, map(lambda s: utils.to_ascii(s.lower()),
src_json['types']))]
fields[field_types] = [(-1, [utils.to_ascii(s.lower())
for s in src_json['types']])]
else:
parsed = False

if 'subtypes' in src_json:
fields[field_subtypes] = [(-1, map(lambda s: utils.to_ascii(s.lower())
# urza's lands...
.replace('"', "'").replace('-', utils.dash_marker),
src_json['subtypes']))]
fields[field_subtypes] = [(-1, [utils.to_ascii(s.lower())
# urza's lands...
.replace('"', "'").replace('-', utils.dash_marker) for s in src_json['subtypes']])]


if 'rarity' in src_json:
Expand Down Expand Up @@ -323,7 +322,7 @@ def fields_from_format(src_text, fmt_ordered, fmt_labeled, fieldsep):

if fmt_labeled:
labels = {fmt_labeled[k] : k for k in fmt_labeled}
field_label_regex = '[' + ''.join(labels.keys()) + ']'
field_label_regex = '[' + ''.join(list(labels.keys())) + ']'
def addf(fields, fkey, fval):
# make sure you pass a pair
if fval and fval[1]:
Expand Down Expand Up @@ -544,14 +543,13 @@ def _set_text(self, values):
self.__dict__[field_text] = mtext
fulltext = mtext.encode()
if fulltext:
self.__dict__[field_text + '_lines'] = map(Manatext,
fulltext.split(utils.newline))
self.__dict__[field_text + '_words'] = re.sub(utils.unletters_regex,
' ',
self.__dict__[field_text + '_lines'] = list(map(Manatext,
fulltext.split(utils.newline)))
self.__dict__[field_text + '_words'] = re.sub(utils.unletters_regex,
' ',
fulltext).split()
self.__dict__[field_text + '_lines_words'] = map(
lambda line: re.sub(utils.unletters_regex, ' ', line).split(),
fulltext.split(utils.newline))
self.__dict__[field_text + '_lines_words'] = [re.sub(
utils.unletters_regex, ' ', line).split() for line in fulltext.split(utils.newline)]
else:
self.valid = False
self.__dict__[field_other] += [(idx, '<text> ' + str(value))]
Expand Down Expand Up @@ -667,11 +665,12 @@ def format(self, gatherer = False, for_forum = False, vdump = False, for_html =

outstr += linebreak

basetypes = map(str.capitalize, self.__dict__[field_types])
basetypes = list(map(str.capitalize, self.__dict__[field_types]))
if vdump and len(basetypes) < 1:
basetypes = ['_NOTYPE_']

outstr += ' '.join(map(str.capitalize, self.__dict__[field_supertypes]) + basetypes)

outstr += ' '.join(list(map(str.capitalize,
self.__dict__[field_supertypes])) + basetypes)

if self.__dict__[field_subtypes]:
outstr += (' ' + utils.dash_marker + ' ' +
Expand Down Expand Up @@ -1005,17 +1004,17 @@ def vectorize(self):
if coststr:
outstr += coststr + ' '

typestr = ' '.join(map(lambda s: '(' + s + ')',
self.__dict__[field_supertypes] + self.__dict__[field_types]))
typestr = ' '.join(
['(' + s + ')' for s in self.__dict__[field_supertypes] + self.__dict__[field_types]])
if typestr:
outstr += typestr + ' '

if self.__dict__[field_subtypes]:
outstr += ' '.join(self.__dict__[field_subtypes]) + ' '

if self.__dict__[field_pt]:
outstr += ' '.join(map(lambda s: '(' + s + ')',
self.__dict__[field_pt].replace('/', '/ /').split()))
outstr += ' '.join(['(' + s + ')' for s in self.__dict__[
field_pt].replace('/', '/ /').split()])
outstr += ' '

if self.__dict__[field_loyalty]:
Expand Down
24 changes: 13 additions & 11 deletions lib/cbow.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def f_nearest(card, vocab, vecs, cardvecs, n):

def f_nearest_per_thread(workitem):
(workcards, vocab, vecs, cardvecs, n) = workitem
return map(lambda card: f_nearest(card, vocab, vecs, cardvecs, n), workcards)
return [f_nearest(card, vocab, vecs, cardvecs, n) for card in workcards]

class CBOW:
def __init__(self, verbose = True,
Expand All @@ -157,17 +157,18 @@ def __init__(self, verbose = True,
self.cardvecs = []

if self.verbose:
print 'Building a cbow model...'
print('Building a cbow model...')

if self.verbose:
print ' Reading binary vector data from: ' + vector_fname
print(' Reading binary vector data from: ' + vector_fname)
(vocab, vecs) = read_vector_file(vector_fname)
self.vocab = vocab
self.vecs = vecs

if self.verbose:
print ' Reading encoded cards from: ' + card_fname
print ' They\'d better be in the same order as the file used to build the vector model!'
print(' Reading encoded cards from: ' + card_fname)
print(
' They\'d better be in the same order as the file used to build the vector model!')
with open(card_fname, 'rt') as f:
text = f.read()
for card_src in text.split(utils.cardsep):
Expand All @@ -179,17 +180,18 @@ def __init__(self, verbose = True,
card.vectorize()))]

if self.verbose:
print '... Done.'
print ' vocab size: ' + str(len(self.vocab))
print ' raw vecs: ' + str(len(self.vecs))
print ' card vecs: ' + str(len(self.cardvecs))
print('... Done.')
print(' vocab size: ' + str(len(self.vocab)))
print(' raw vecs: ' + str(len(self.vecs)))
print(' card vecs: ' + str(len(self.cardvecs)))

def nearest(self, card, n=5):
return f_nearest(card, self.vocab, self.vecs, self.cardvecs, n)

def nearest_par(self, cards, n=5, threads=cores):
workpool = multiprocessing.Pool(threads)
proto_worklist = namediff.list_split(cards, threads)
worklist = map(lambda x: (x, self.vocab, self.vecs, self.cardvecs, n), proto_worklist)
worklist = [(x, self.vocab, self.vecs, self.cardvecs, n)
for x in proto_worklist]
donelist = workpool.map(f_nearest_per_thread, worklist)
return namediff.list_flatten(donelist)
Loading