suppress nltk messages
This commit is contained in:
Родитель
ef96d27f61
Коммит
e031d3d225
|
@ -38,7 +38,8 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"System version: 3.6.8 |Anaconda, Inc.| (default, Feb 21 2019, 18:30:04) [MSC v.1916 64 bit (AMD64)]\n"
|
||||
"System version: 3.6.8 |Anaconda, Inc.| (default, Dec 29 2018, 19:04:46) \n",
|
||||
"[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -92,7 +93,7 @@
|
|||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 92.3k/92.3k [00:23<00:00, 3.86kKB/s]\n"
|
||||
"100%|██████████| 92.3k/92.3k [01:01<00:00, 1.50kKB/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -437,17 +438,7 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[nltk_data] Downloading package punkt to\n",
|
||||
"[nltk_data] C:\\Users\\abeswara\\AppData\\Roaming\\nltk_data...\n",
|
||||
"[nltk_data] Package punkt is already up-to-date!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_tok = to_nltk_tokens(to_lowercase_all(train))"
|
||||
]
|
||||
|
@ -583,23 +574,7 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[nltk_data] Downloading package punkt to\n",
|
||||
"[nltk_data] C:\\Users\\abeswara\\AppData\\Roaming\\nltk_data...\n",
|
||||
"[nltk_data] Package punkt is already up-to-date!\n",
|
||||
"[nltk_data] Downloading package punkt to\n",
|
||||
"[nltk_data] C:\\Users\\abeswara\\AppData\\Roaming\\nltk_data...\n",
|
||||
"[nltk_data] Package punkt is already up-to-date!\n",
|
||||
"[nltk_data] Downloading package punkt to\n",
|
||||
"[nltk_data] C:\\Users\\abeswara\\AppData\\Roaming\\nltk_data...\n",
|
||||
"[nltk_data] Package punkt is already up-to-date!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train = snli.load_pandas_df(BASE_DATA_PATH, file_split=\"train\")\n",
|
||||
"dev = snli.load_pandas_df(BASE_DATA_PATH, file_split=\"dev\")\n",
|
||||
|
@ -669,9 +644,9 @@
|
|||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "nlp",
|
||||
"display_name": "Python (nlp_cpu)",
|
||||
"language": "python",
|
||||
"name": "nlp"
|
||||
"name": "nlp_cpu"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
|
|
@ -62,14 +62,14 @@ PIP_BASE = {
|
|||
"pyemd": "pyemd==0.5.1",
|
||||
"ipywebrtc": "ipywebrtc==0.4.3",
|
||||
"pre-commit": "pre-commit>=1.14.4",
|
||||
"spacy": "spacy>=2.1.4",
|
||||
"spacy-models": "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.1.0/en_core_web_sm-2.1.0.tar.gz",
|
||||
"gensim": "gensim>=3.7.0",
|
||||
"nltk": "nltk>=3.4",
|
||||
"pytorch-pretrained-bert": "pytorch-pretrained-bert>=0.6",
|
||||
"horovod": "horovod>=0.16.1",
|
||||
}
|
||||
|
||||
PIP_GPU = {}
|
||||
PIP_GPU = {"horovod": "horovod>=0.16.1"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
import nltk
|
||||
|
||||
nltk.download("punkt", quiet=True)
|
|
@ -122,12 +122,8 @@ def to_nltk_tokens(
|
|||
pd.DataFrame: Dataframe with new columns token_cols, each containing a
|
||||
list of tokens for their respective sentences.
|
||||
"""
|
||||
|
||||
nltk.download("punkt")
|
||||
text_df = df[sentence_cols]
|
||||
tok_df = text_df.applymap(
|
||||
lambda sentence: nltk.word_tokenize(sentence)
|
||||
)
|
||||
tok_df = text_df.applymap(lambda sentence: nltk.word_tokenize(sentence))
|
||||
tok_df.columns = token_cols
|
||||
tokenized = pd.concat([df, tok_df], axis=1)
|
||||
return tokenized
|
||||
|
@ -158,11 +154,9 @@ def rm_nltk_stopwords(
|
|||
nltk.download("stopwords")
|
||||
stop_words = tuple(stopwords.words("english"))
|
||||
text_df = df[sentence_cols]
|
||||
stop_df = (
|
||||
text_df
|
||||
.applymap(lambda sentence: nltk.word_tokenize(sentence))
|
||||
.applymap(lambda l: [word for word in l if word not in stop_words])
|
||||
)
|
||||
stop_df = text_df.applymap(
|
||||
lambda sentence: nltk.word_tokenize(sentence)
|
||||
).applymap(lambda l: [word for word in l if word not in stop_words])
|
||||
|
||||
stop_df.columns = stop_cols
|
||||
return pd.concat([df, stop_df], axis=1)
|
||||
|
|
Загрузка…
Ссылка в новой задаче