Updated notebook with latest results and refactored imports
This commit is contained in:
Родитель
418ef60a65
Коммит
078466259f
|
@ -159,13 +159,6 @@
|
|||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 92.3k/92.3k [00:04<00:00, 21.8kKB/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
|
@ -573,7 +566,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -601,7 +594,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"nbpresent": {
|
||||
"id": "641a9c74-974c-4aac-8c16-3b44d686f0f3"
|
||||
|
@ -610,6 +603,9 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2\n",
|
||||
"\n",
|
||||
"config_filepath = 'gensen_config.json'\n",
|
||||
"clf = GenSenClassifier(config_file = config_filepath, \n",
|
||||
" pretrained_embedding_path = pretrained_embedding_path,\n",
|
||||
|
@ -630,7 +626,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"nbpresent": {
|
||||
"id": "6ea45671-c7a5-4fe8-a450-8b54161f26c5"
|
||||
|
@ -657,20 +653,7 @@
|
|||
"/data/anaconda/envs/nlp_gpu/lib/python3.6/site-packages/horovod/torch/__init__.py:163: UserWarning: optimizer.step(synchronize=True) called after optimizer.synchronize(). This can cause training slowdown. You may want to consider using optimizer.step(synchronize=False) if you use optimizer.synchronize() in your code.\n",
|
||||
" warnings.warn(\"optimizer.step(synchronize=True) called after \"\n",
|
||||
"../../scenarios/sentence_similarity/gensen_train.py:238: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
|
||||
" f.softmax(class_logits).data.cpu().numpy().argmax(axis=-1)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"NLI Dev Acc : 0.32869\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" f.softmax(class_logits).data.cpu().numpy().argmax(axis=-1)\n",
|
||||
"../../scenarios/sentence_similarity/gensen_train.py:257: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
|
||||
" f.softmax(class_logits).data.cpu().numpy().argmax(axis=-1)\n"
|
||||
]
|
||||
|
@ -679,107 +662,8 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"NLI Test Acc : 0.32767\n",
|
||||
"NLI Dev Acc : 0.32869\n",
|
||||
"NLI Test Acc : 0.32767\n",
|
||||
"NLI Dev Acc : 0.34485\n",
|
||||
"NLI Test Acc : 0.34334\n",
|
||||
"NLI Dev Acc : 0.33306\n",
|
||||
"NLI Test Acc : 0.32950\n",
|
||||
"NLI Dev Acc : 0.33824\n",
|
||||
"NLI Test Acc : 0.34283\n",
|
||||
"NLI Dev Acc : 0.36507\n",
|
||||
"NLI Test Acc : 0.36482\n",
|
||||
"NLI Dev Acc : 0.34983\n",
|
||||
"NLI Test Acc : 0.34762\n",
|
||||
"NLI Dev Acc : 0.35216\n",
|
||||
"NLI Test Acc : 0.34792\n",
|
||||
"NLI Dev Acc : 0.33824\n",
|
||||
"NLI Test Acc : 0.34283\n",
|
||||
"NLI Dev Acc : 0.33835\n",
|
||||
"NLI Test Acc : 0.34294\n",
|
||||
"NLI Dev Acc : 0.33306\n",
|
||||
"NLI Test Acc : 0.32950\n",
|
||||
"NLI Dev Acc : 0.33306\n",
|
||||
"NLI Test Acc : 0.32950\n",
|
||||
"NLI Dev Acc : 0.34302\n",
|
||||
"NLI Test Acc : 0.34161\n",
|
||||
"NLI Dev Acc : 0.36385\n",
|
||||
"NLI Test Acc : 0.37154\n",
|
||||
"NLI Dev Acc : 0.38295\n",
|
||||
"NLI Test Acc : 0.38386\n",
|
||||
"NLI Dev Acc : 0.38793\n",
|
||||
"NLI Test Acc : 0.38742\n",
|
||||
"NLI Dev Acc : 0.39138\n",
|
||||
"NLI Test Acc : 0.38976\n",
|
||||
"NLI Dev Acc : 0.35135\n",
|
||||
"NLI Test Acc : 0.35393\n",
|
||||
"NLI Dev Acc : 0.34007\n",
|
||||
"NLI Test Acc : 0.33744\n",
|
||||
"NLI Dev Acc : 0.33306\n",
|
||||
"NLI Test Acc : 0.32950\n",
|
||||
"NLI Dev Acc : 0.33306\n",
|
||||
"NLI Test Acc : 0.32950\n",
|
||||
"NLI Dev Acc : 0.33804\n",
|
||||
"NLI Test Acc : 0.34263\n",
|
||||
"NLI Dev Acc : 0.34617\n",
|
||||
"NLI Test Acc : 0.35413\n",
|
||||
"NLI Dev Acc : 0.35034\n",
|
||||
"NLI Test Acc : 0.34772\n",
|
||||
"NLI Dev Acc : 0.33306\n",
|
||||
"NLI Test Acc : 0.32950\n",
|
||||
"NLI Dev Acc : 0.33560\n",
|
||||
"NLI Test Acc : 0.33184\n",
|
||||
"NLI Dev Acc : 0.35298\n",
|
||||
"NLI Test Acc : 0.35922\n",
|
||||
"NLI Dev Acc : 0.34363\n",
|
||||
"NLI Test Acc : 0.34009\n",
|
||||
"NLI Dev Acc : 0.36365\n",
|
||||
"NLI Test Acc : 0.36238\n",
|
||||
"NLI Dev Acc : 0.35145\n",
|
||||
"NLI Test Acc : 0.35077\n",
|
||||
"NLI Dev Acc : 0.33367\n",
|
||||
"NLI Test Acc : 0.33092\n",
|
||||
"NLI Dev Acc : 0.36141\n",
|
||||
"NLI Test Acc : 0.35882\n",
|
||||
"NLI Dev Acc : 0.35369\n",
|
||||
"NLI Test Acc : 0.35678\n",
|
||||
"NLI Dev Acc : 0.32869\n",
|
||||
"NLI Test Acc : 0.32767\n",
|
||||
"NLI Dev Acc : 0.32869\n",
|
||||
"NLI Test Acc : 0.32767\n",
|
||||
"NLI Dev Acc : 0.32869\n",
|
||||
"NLI Test Acc : 0.32767\n",
|
||||
"NLI Dev Acc : 0.32910\n",
|
||||
"NLI Test Acc : 0.32807\n",
|
||||
"NLI Dev Acc : 0.35470\n",
|
||||
"NLI Test Acc : 0.35230\n",
|
||||
"NLI Dev Acc : 0.40469\n",
|
||||
"NLI Test Acc : 0.40869\n",
|
||||
"NLI Dev Acc : 0.37106\n",
|
||||
"NLI Test Acc : 0.36594\n",
|
||||
"NLI Dev Acc : 0.37939\n",
|
||||
"NLI Test Acc : 0.37246\n",
|
||||
"NLI Dev Acc : 0.38254\n",
|
||||
"NLI Test Acc : 0.37724\n",
|
||||
"NLI Dev Acc : 0.37309\n",
|
||||
"NLI Test Acc : 0.37449\n",
|
||||
"NLI Dev Acc : 0.33936\n",
|
||||
"NLI Test Acc : 0.33876\n",
|
||||
"NLI Dev Acc : 0.34820\n",
|
||||
"NLI Test Acc : 0.34864\n",
|
||||
"NLI Dev Acc : 0.38387\n",
|
||||
"NLI Test Acc : 0.38060\n",
|
||||
"NLI Dev Acc : 0.40073\n",
|
||||
"NLI Test Acc : 0.40564\n",
|
||||
"NLI Dev Acc : 0.35867\n",
|
||||
"NLI Test Acc : 0.36553\n",
|
||||
"NLI Dev Acc : 0.35277\n",
|
||||
"NLI Test Acc : 0.36044\n",
|
||||
"NLI Dev Acc : 0.40205\n",
|
||||
"NLI Test Acc : 0.39963\n",
|
||||
"CPU times: user 1h 8min 52s, sys: 19min 2s, total: 1h 27min 55s\n",
|
||||
"Wall time: 1h 28min\n"
|
||||
"CPU times: user 29min 7s, sys: 7min 43s, total: 36min 51s\n",
|
||||
"Wall time: 36min 59s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -797,9 +681,19 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"******** Similarity Score for sentences **************\n",
|
||||
" 0 1\n",
|
||||
"0 1.00000 0.96147\n",
|
||||
"1 0.96147 1.00000\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/papermill.record+json": {
|
||||
|
@ -811,11 +705,11 @@
|
|||
"data": [
|
||||
[
|
||||
1,
|
||||
0.9761484548147126
|
||||
0.9614701535385097
|
||||
],
|
||||
[
|
||||
0.9761484548147126,
|
||||
1
|
||||
0.9614701535385097,
|
||||
0.9999999999999998
|
||||
]
|
||||
],
|
||||
"index": [
|
||||
|
@ -832,10 +726,12 @@
|
|||
"source": [
|
||||
"sentences = [\n",
|
||||
" 'the quick brown fox jumped over the lazy dog',\n",
|
||||
" 'it is going to be a bright sunshiny day tomorrow'\n",
|
||||
" 'bright sunshiny day tomorrow.'\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"results = clf.predict(sentences)\n",
|
||||
"print(\"******** Similarity Score for sentences **************\")\n",
|
||||
"print(results)\n",
|
||||
"pm.record(\"results\", results.to_dict(orient='split'))"
|
||||
]
|
||||
},
|
||||
|
@ -848,13 +744,6 @@
|
|||
"1. Subramanian, Sandeep and Trischler, Adam and Bengio, Yoshua and Pal, Christopher J, [*Learning general purpose distributed sentence representations via large scale multi-task learning*](https://arxiv.org/abs/1804.00079), ICLR, 2018.\n",
|
||||
"3. Semantic textual similarity. url: http://nlpprogress.com/english/semantic_textual_similarity.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
# Licensed under the MIT License.
|
||||
import json
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from scenarios.sentence_similarity.gensen_train import train
|
||||
from utils_nlp.gensen.create_gensen_model import (
|
||||
|
|
|
@ -90,13 +90,13 @@ def _split_and_cleanup(split_map, data_path):
|
|||
"snli_1.0_{}.txt.s2.tok".format(file_split),
|
||||
)
|
||||
with open(s1_tok_path, "r") as fin, open(
|
||||
"{}.tmp".format(s1_tok_path), "w"
|
||||
"{}.tmp".format(s1_tok_path), "w"
|
||||
) as tmp:
|
||||
for line in fin:
|
||||
s = line.replace('"', "")
|
||||
tmp.write(s)
|
||||
with open(s2_tok_path, "r") as fin, open(
|
||||
"{}.tmp".format(s2_tok_path), "w"
|
||||
"{}.tmp".format(s2_tok_path), "w"
|
||||
) as tmp:
|
||||
for line in fin:
|
||||
s = line.replace('"', "")
|
||||
|
|
Загрузка…
Ссылка в новой задаче