This commit is contained in:
Abhiram E 2019-06-12 16:42:23 -04:00 коммит произвёл Liqun Shao
Родитель eb6719d9ec
Коммит a065ae9bb0
1 изменённых файлов: 5 добавлений и 10 удалений

Просмотреть файл

@ -22,10 +22,7 @@ def _preprocess(split_map, data_path, column_names):
for file_split, df in split_map.items():
base_txt_path = os.path.join(
data_path,
os.path.join(
SNLI_CLEAN_PATH, "/snli_1.0_{}.txt".format(file_split)
),
data_path, SNLI_CLEAN_PATH, "/snli_1.0_{}.txt".format(file_split)
)
df[column_names[0]] = df["sentence1_tokens"].apply(
@ -83,15 +80,13 @@ def _split_and_cleanup(split_map, data_path):
s1_tok_path = os.path.join(
data_path,
os.path.join(
SNLI_CLEAN_PATH, "/snli_1.0_{}.txt.s1.tok".format(file_split)
),
SNLI_CLEAN_PATH,
"/snli_1.0_{}.txt.s1.tok".format(file_split),
)
s2_tok_path = os.path.join(
data_path,
os.path.join(
SNLI_CLEAN_PATH, "/snli_1.0_{}.txt.s2.tok".format(file_split)
),
SNLI_CLEAN_PATH,
"/snli_1.0_{}.txt.s2.tok".format(file_split),
)
with open(s1_tok_path, "r") as fin, open(
"{}.tmp".format(s1_tok_path), "w"