This commit is contained in:
Said Bleik 2019-06-17 12:25:01 -04:00
Родитель 0929c37d56
Коммит f12aabd5b0
1 изменённых файлов: 17 добавлений и 15 удалений

Просмотреть файл

@ -6,32 +6,35 @@ https://www.nyu.edu/projects/bowman/xnli/
"""
import os
import pandas as pd
import requests
from utils_nlp.dataset.url_utils import extract_zip, maybe_download
import pandas as pd
from utils_nlp.dataset.url_utils import extract_zip, maybe_download
URL = "https://www.nyu.edu/projects/bowman/xnli/XNLI-1.0.zip"
DATA_FILES = {"dev": "XNLI-1.0/xnli.dev.jsonl", "test": "XNLI-1.0/xnli.test.jsonl"}
DATA_FILES = {
"dev": "XNLI-1.0/xnli.dev.jsonl",
"test": "XNLI-1.0/xnli.test.jsonl",
}
def load_pandas_df(local_cache_path=None, file_split="train"):
"""Downloads and extracts the dataset files
def load_pandas_df(local_cache_path=None, file_split="dev"):
"""Downloads and extracts the dataset files
Args:
local_cache_path ([type], optional): [description]. Defaults to None.
local_cache_path ([type], optional): [description].
Defaults to None.
file_split (str, optional): The subset to load.
One of: {"dev", "test"}
Defaults to "train".
One of: {"dev", "test"}
Defaults to "train".
Returns:
pd.DataFrame: pandas DataFrame containing the specified XNLI subset.
pd.DataFrame: pandas DataFrame containing the specified
XNLI subset.
"""
file_name = URL.split("/")[-1]
if not os.path.exists(os.path.join(local_cache_path, file_name)):
response = requests.get(URL)
with open(os.path.join(local_cache_path, file_name), "wb") as f:
f.write(response.content)
maybe_download(URL, file_name, local_cache_path)
if not os.path.exists(
os.path.join(local_cache_path, DATA_FILES[file_split])
):
@ -41,4 +44,3 @@ def load_pandas_df(local_cache_path=None, file_split="train"):
return pd.read_json(
os.path.join(local_cache_path, DATA_FILES[file_split]), lines=True
)