This commit is contained in:
saidbleik 2019-10-24 20:36:08 +00:00
Родитель 269b26eb02
Коммит 1782b93ce7
1 изменённых файлов: 18 добавлений и 146 удалений

Просмотреть файл

@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -72,7 +72,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -127,7 +127,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -168,78 +168,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Therefore, they were not required to prepare a...</td>\n",
" <td>government</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>It could fall any time, said the Kal.</td>\n",
" <td>fiction</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>A risk-neutral person is one who is indifferen...</td>\n",
" <td>slate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>(Click for Chatterbox's take on Snitchensgate.</td>\n",
" <td>slate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>wow yeah and then it then it puts a lot of res...</td>\n",
" <td>telephone</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" text label\n",
"0 Therefore, they were not required to prepare a... government\n",
"1 It could fall any time, said the Kal. fiction\n",
"2 A risk-neutral person is one who is indifferen... slate\n",
"3 (Click for Chatterbox's take on Snitchensgate. slate\n",
"4 wow yeah and then it then it puts a lot of res... telephone"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# create training data sample\n",
"os.makedirs(TEMP_DIR, exist_ok=True)\n",
@ -262,30 +193,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Uploading an estimated of 1 files\n",
"Uploading temp/train.csv\n",
"Uploaded temp/train.csv, 1 files out of an estimated total of 1\n",
"Uploaded 1 files\n"
]
},
{
"data": {
"text/plain": [
"$AZUREML_DATAREFERENCE_35a15e0fc32e4ff2bd8fa3c5a42e2426"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# upload data to datastore\n",
"ds = ws.get_default_datastore()\n",
@ -313,7 +223,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -323,7 +233,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -358,24 +268,16 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['bert-base-uncased', 'bert-large-uncased', 'bert-base-cased', 'bert-large-cased', 'bert-base-multilingual-uncased', 'bert-base-multilingual-cased', 'bert-base-chinese', 'bert-base-german-cased', 'bert-large-uncased-whole-word-masking', 'bert-large-cased-whole-word-masking', 'bert-large-uncased-whole-word-masking-finetuned-squad', 'bert-large-cased-whole-word-masking-finetuned-squad', 'bert-base-cased-finetuned-mrpc', 'bert-base-german-dbmdz-cased', 'bert-base-german-dbmdz-uncased', 'roberta-base', 'roberta-large', 'roberta-large-mnli', 'xlnet-base-cased', 'xlnet-large-cased', 'distilbert-base-uncased', 'distilbert-base-uncased-distilled-squad']\n"
]
}
],
"outputs": [],
"source": [
"print(SequenceClassifier.list_supported_models())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -480,7 +382,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -492,7 +394,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -502,21 +404,9 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'train_step_run' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-1-b6e7c53d3a5d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# download its output (a traind model & a label encoder)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mtrain_step_run\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_output_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdownload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlocal_path\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTEMP_DIR\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# load classifier and label encoder\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mtrained_dir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"./temp/azureml/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtrain_step_run\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mid\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0moutput_dir\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'train_step_run' is not defined"
]
}
],
"outputs": [],
"source": [
"# download its output (a traind model & a label encoder)\n",
"train_step_run.get_output_data(output_dir.name).download(local_path=TEMP_DIR)\n",
@ -541,27 +431,9 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating: 100%|██████████| 1/1 [00:01<00:00, 1.56s/it]\n"
]
},
{
"data": {
"text/plain": [
"array(['fiction'], dtype=object)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# test\n",
"test_input = [\"Let's go to Orlando. I've heard it's a nice place\"]\n",