feat(bug fix) updated snli notebook with to_lowercase_all() instead of to_lowercase() that expects a column name list. Fixed None object returning in to_lowercase when column name list is not passed

This commit is contained in:
Janhavi Mahajan 2019-05-13 18:14:31 -04:00
Родитель 07ca05dd04
Коммит 1ed2c4dc0a
2 изменённых файлов: 15 добавлений и 12 удалений

Просмотреть файл

@ -38,8 +38,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"System version: 3.6.8 |Anaconda, Inc.| (default, Dec 29 2018, 19:04:46) \n",
"[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]\n"
"System version: 3.6.8 |Anaconda, Inc.| (default, Feb 21 2019, 18:30:04) [MSC v.1916 64 bit (AMD64)]\n"
]
}
],
@ -48,7 +47,7 @@
"sys.path.append(\"../../../\")\n",
"\n",
"import os\n",
"from utils_nlp.dataset.preprocess import to_lowercase, to_nltk_tokens\n",
"from utils_nlp.dataset.preprocess import to_lowercase_all, to_nltk_tokens\n",
"from utils_nlp.dataset import snli\n",
"\n",
"print(\"System version: {}\".format(sys.version))"
@ -429,13 +428,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package punkt to /Users/caseyhong/nltk_data...\n",
"[nltk_data] Downloading package punkt to\n",
"[nltk_data] C:\\Users\\jamahaja\\AppData\\Roaming\\nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n"
]
}
],
"source": [
"train_tok = to_nltk_tokens(to_lowercase(train))"
"train_tok = to_nltk_tokens(to_lowercase_all(train))"
]
},
{
@ -574,11 +574,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package punkt to /Users/caseyhong/nltk_data...\n",
"[nltk_data] Downloading package punkt to\n",
"[nltk_data] C:\\Users\\jamahaja\\AppData\\Roaming\\nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n",
"[nltk_data] Downloading package punkt to /Users/caseyhong/nltk_data...\n",
"[nltk_data] Downloading package punkt to\n",
"[nltk_data] C:\\Users\\jamahaja\\AppData\\Roaming\\nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n",
"[nltk_data] Downloading package punkt to /Users/caseyhong/nltk_data...\n",
"[nltk_data] Downloading package punkt to\n",
"[nltk_data] C:\\Users\\jamahaja\\AppData\\Roaming\\nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n"
]
}
@ -592,9 +595,9 @@
"clean_dev = clean(dev, file_split=\"dev\")\n",
"clean_test = clean(dev, file_split=\"test\")\n",
"\n",
"train_tok = to_nltk_tokens(to_lowercase(clean_train))\n",
"dev_tok = to_nltk_tokens(to_lowercase(clean_dev))\n",
"test_tok = to_nltk_tokens(to_lowercase(clean_test))\n",
"train_tok = to_nltk_tokens(to_lowercase_all(clean_train))\n",
"dev_tok = to_nltk_tokens(to_lowercase_all(clean_dev))\n",
"test_tok = to_nltk_tokens(to_lowercase_all(clean_test))\n",
"\n",
"split_map = {'train': train_tok, 'dev': dev_tok, 'test': test_tok}\n",
"for file_split, df in split_map.items():\n",

Просмотреть файл

@ -32,7 +32,7 @@ def to_lowercase(df, column_names=[]):
pd.DataFrame: Dataframe with columns with lowercase standardization.
"""
if not column_names:
to_lowercase_all(df)
return to_lowercase_all(df)
else:
df[column_names] = df[column_names].applymap(
lambda s: s.lower() if type(s) == str else s