condition parameter conversion to python 2

This commit is contained in:
Jean A. Senellart 2018-07-18 07:36:11 +10:00
Родитель d92491ff12
Коммит 8450bd3231
3 изменённых файлов: 15 добавлений и 4 удалений

Просмотреть файл

@ -96,7 +96,7 @@ def learn_joint_bpe_and_vocab(args):
learn_bpe.learn_bpe(vocab_list, output, args.symbols, args.min_frequency, args.verbose, is_dict=True)
with codecs.open(args.output.name, encoding='UTF-8') as codes:
bpe = apply_bpe.BPE(codes, separator=args.separator.decode('utf-8'))
bpe = apply_bpe.BPE(codes, separator=args.separator)
# apply BPE to each training corpus and get vocabulary
for train_file, vocab_file in zip(args.input, args.vocab):
@ -147,6 +147,9 @@ if __name__ == '__main__':
parser = create_parser()
args = parser.parse_args()
if sys.version_info < (3, 0):
args.separator = args.separator.decode('UTF-8')
assert(len(args.input) == len(args.vocab))
learn_joint_bpe_and_vocab(args)

Просмотреть файл

@ -60,7 +60,7 @@ def segment_char_ngrams(args):
args.output.write(word[i*args.n:i*args.n+args.n])
i += 1
if i*args.n < len(word):
args.output.write(args.separator.decode('utf-8'))
args.output.write(args.separator)
args.output.write(' ')
else:
args.output.write(word + ' ')
@ -82,6 +82,9 @@ if __name__ == '__main__':
parser = create_parser()
args = parser.parse_args()
if sys.version_info < (3, 0):
args.separator = args.separator.decode('UTF-8')
# read/write files as UTF-8
args.vocab = codecs.open(args.vocab.name, encoding='utf-8')
if args.input.name != '<stdin>':
@ -89,4 +92,4 @@ if __name__ == '__main__':
if args.output.name != '<stdout>':
args.output = codecs.open(args.output.name, 'w', encoding='utf-8')
segment_char_ngrams(args)
segment_char_ngrams(args)

Просмотреть файл

@ -38,6 +38,11 @@ learn-joint-bpe-and-vocab: executes recommended workflow for joint BPE.""")
args = parser.parse_args()
if sys.version_info < (3, 0):
args.separator = args.separator.decode('UTF-8')
if args.glossaries:
args.glossaries = [g.decode('UTF-8') for g in args.glossaries]
if args.command == 'learn-bpe':
# read/write files as UTF-8
if args.input.name != '<stdin>':
@ -61,7 +66,7 @@ learn-joint-bpe-and-vocab: executes recommended workflow for joint BPE.""")
else:
vocabulary = None
bpe = BPE(args.codes, args.merges, args.separator.decode('utf-8'), vocabulary, args.glossaries)
bpe = BPE(args.codes, args.merges, args.separator, vocabulary, args.glossaries)
for line in args.input:
args.output.write(bpe.process_line(line))