feat(bug fix) updated snli notebook with to_lowercase_all() instead of to_lowercase() that expects a column name list. Fixed None object returning in to_lowercase when column name list is not passed
This commit is contained in:
Родитель
07ca05dd04
Коммит
1ed2c4dc0a
|
@ -38,8 +38,7 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"System version: 3.6.8 |Anaconda, Inc.| (default, Dec 29 2018, 19:04:46) \n",
|
||||
"[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]\n"
|
||||
"System version: 3.6.8 |Anaconda, Inc.| (default, Feb 21 2019, 18:30:04) [MSC v.1916 64 bit (AMD64)]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -48,7 +47,7 @@
|
|||
"sys.path.append(\"../../../\")\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"from utils_nlp.dataset.preprocess import to_lowercase, to_nltk_tokens\n",
|
||||
"from utils_nlp.dataset.preprocess import to_lowercase_all, to_nltk_tokens\n",
|
||||
"from utils_nlp.dataset import snli\n",
|
||||
"\n",
|
||||
"print(\"System version: {}\".format(sys.version))"
|
||||
|
@ -429,13 +428,14 @@
|
|||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[nltk_data] Downloading package punkt to /Users/caseyhong/nltk_data...\n",
|
||||
"[nltk_data] Downloading package punkt to\n",
|
||||
"[nltk_data] C:\\Users\\jamahaja\\AppData\\Roaming\\nltk_data...\n",
|
||||
"[nltk_data] Package punkt is already up-to-date!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"train_tok = to_nltk_tokens(to_lowercase(train))"
|
||||
"train_tok = to_nltk_tokens(to_lowercase_all(train))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -574,11 +574,14 @@
|
|||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[nltk_data] Downloading package punkt to /Users/caseyhong/nltk_data...\n",
|
||||
"[nltk_data] Downloading package punkt to\n",
|
||||
"[nltk_data] C:\\Users\\jamahaja\\AppData\\Roaming\\nltk_data...\n",
|
||||
"[nltk_data] Package punkt is already up-to-date!\n",
|
||||
"[nltk_data] Downloading package punkt to /Users/caseyhong/nltk_data...\n",
|
||||
"[nltk_data] Downloading package punkt to\n",
|
||||
"[nltk_data] C:\\Users\\jamahaja\\AppData\\Roaming\\nltk_data...\n",
|
||||
"[nltk_data] Package punkt is already up-to-date!\n",
|
||||
"[nltk_data] Downloading package punkt to /Users/caseyhong/nltk_data...\n",
|
||||
"[nltk_data] Downloading package punkt to\n",
|
||||
"[nltk_data] C:\\Users\\jamahaja\\AppData\\Roaming\\nltk_data...\n",
|
||||
"[nltk_data] Package punkt is already up-to-date!\n"
|
||||
]
|
||||
}
|
||||
|
@ -592,9 +595,9 @@
|
|||
"clean_dev = clean(dev, file_split=\"dev\")\n",
|
||||
"clean_test = clean(dev, file_split=\"test\")\n",
|
||||
"\n",
|
||||
"train_tok = to_nltk_tokens(to_lowercase(clean_train))\n",
|
||||
"dev_tok = to_nltk_tokens(to_lowercase(clean_dev))\n",
|
||||
"test_tok = to_nltk_tokens(to_lowercase(clean_test))\n",
|
||||
"train_tok = to_nltk_tokens(to_lowercase_all(clean_train))\n",
|
||||
"dev_tok = to_nltk_tokens(to_lowercase_all(clean_dev))\n",
|
||||
"test_tok = to_nltk_tokens(to_lowercase_all(clean_test))\n",
|
||||
"\n",
|
||||
"split_map = {'train': train_tok, 'dev': dev_tok, 'test': test_tok}\n",
|
||||
"for file_split, df in split_map.items():\n",
|
||||
|
|
|
@ -32,7 +32,7 @@ def to_lowercase(df, column_names=[]):
|
|||
pd.DataFrame: Dataframe with columns with lowercase standardization.
|
||||
"""
|
||||
if not column_names:
|
||||
to_lowercase_all(df)
|
||||
return to_lowercase_all(df)
|
||||
else:
|
||||
df[column_names] = df[column_names].applymap(
|
||||
lambda s: s.lower() if type(s) == str else s
|
||||
|
|
Загрузка…
Ссылка в новой задаче