зеркало из https://github.com/mozilla/subword-nmt.git
condition parameter conversion to python 2
This commit is contained in:
Родитель
d92491ff12
Коммит
8450bd3231
|
@ -96,7 +96,7 @@ def learn_joint_bpe_and_vocab(args):
|
|||
learn_bpe.learn_bpe(vocab_list, output, args.symbols, args.min_frequency, args.verbose, is_dict=True)
|
||||
|
||||
with codecs.open(args.output.name, encoding='UTF-8') as codes:
|
||||
bpe = apply_bpe.BPE(codes, separator=args.separator.decode('utf-8'))
|
||||
bpe = apply_bpe.BPE(codes, separator=args.separator)
|
||||
|
||||
# apply BPE to each training corpus and get vocabulary
|
||||
for train_file, vocab_file in zip(args.input, args.vocab):
|
||||
|
@ -147,6 +147,9 @@ if __name__ == '__main__':
|
|||
parser = create_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
args.separator = args.separator.decode('UTF-8')
|
||||
|
||||
assert(len(args.input) == len(args.vocab))
|
||||
|
||||
learn_joint_bpe_and_vocab(args)
|
||||
|
|
|
@ -60,7 +60,7 @@ def segment_char_ngrams(args):
|
|||
args.output.write(word[i*args.n:i*args.n+args.n])
|
||||
i += 1
|
||||
if i*args.n < len(word):
|
||||
args.output.write(args.separator.decode('utf-8'))
|
||||
args.output.write(args.separator)
|
||||
args.output.write(' ')
|
||||
else:
|
||||
args.output.write(word + ' ')
|
||||
|
@ -82,6 +82,9 @@ if __name__ == '__main__':
|
|||
parser = create_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
args.separator = args.separator.decode('UTF-8')
|
||||
|
||||
# read/write files as UTF-8
|
||||
args.vocab = codecs.open(args.vocab.name, encoding='utf-8')
|
||||
if args.input.name != '<stdin>':
|
||||
|
@ -89,4 +92,4 @@ if __name__ == '__main__':
|
|||
if args.output.name != '<stdout>':
|
||||
args.output = codecs.open(args.output.name, 'w', encoding='utf-8')
|
||||
|
||||
segment_char_ngrams(args)
|
||||
segment_char_ngrams(args)
|
|
@ -38,6 +38,11 @@ learn-joint-bpe-and-vocab: executes recommended workflow for joint BPE.""")
|
|||
|
||||
args = parser.parse_args()
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
args.separator = args.separator.decode('UTF-8')
|
||||
if args.glossaries:
|
||||
args.glossaries = [g.decode('UTF-8') for g in args.glossaries]
|
||||
|
||||
if args.command == 'learn-bpe':
|
||||
# read/write files as UTF-8
|
||||
if args.input.name != '<stdin>':
|
||||
|
@ -61,7 +66,7 @@ learn-joint-bpe-and-vocab: executes recommended workflow for joint BPE.""")
|
|||
else:
|
||||
vocabulary = None
|
||||
|
||||
bpe = BPE(args.codes, args.merges, args.separator.decode('utf-8'), vocabulary, args.glossaries)
|
||||
bpe = BPE(args.codes, args.merges, args.separator, vocabulary, args.glossaries)
|
||||
|
||||
for line in args.input:
|
||||
args.output.write(bpe.process_line(line))
|
||||
|
|
Загрузка…
Ссылка в новой задаче