Updated notebook with latest results and refactored imports

This commit is contained in:
Abhiram E 2019-06-25 16:41:54 -04:00
Родитель 418ef60a65
Коммит 078466259f
3 изменённых файлов: 30 добавлений и 140 удалений

Просмотреть файл

@ -159,13 +159,6 @@
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 92.3k/92.3k [00:04<00:00, 21.8kKB/s]\n"
]
},
{
"data": {
"text/html": [
@ -573,7 +566,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"outputs": [
{
@ -601,7 +594,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 7,
"metadata": {
"nbpresent": {
"id": "641a9c74-974c-4aac-8c16-3b44d686f0f3"
@ -610,6 +603,9 @@
},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"config_filepath = 'gensen_config.json'\n",
"clf = GenSenClassifier(config_file = config_filepath, \n",
" pretrained_embedding_path = pretrained_embedding_path,\n",
@ -630,7 +626,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"metadata": {
"nbpresent": {
"id": "6ea45671-c7a5-4fe8-a450-8b54161f26c5"
@ -657,20 +653,7 @@
"/data/anaconda/envs/nlp_gpu/lib/python3.6/site-packages/horovod/torch/__init__.py:163: UserWarning: optimizer.step(synchronize=True) called after optimizer.synchronize(). This can cause training slowdown. You may want to consider using optimizer.step(synchronize=False) if you use optimizer.synchronize() in your code.\n",
" warnings.warn(\"optimizer.step(synchronize=True) called after \"\n",
"../../scenarios/sentence_similarity/gensen_train.py:238: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
" f.softmax(class_logits).data.cpu().numpy().argmax(axis=-1)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"NLI Dev Acc : 0.32869\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" f.softmax(class_logits).data.cpu().numpy().argmax(axis=-1)\n",
"../../scenarios/sentence_similarity/gensen_train.py:257: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
" f.softmax(class_logits).data.cpu().numpy().argmax(axis=-1)\n"
]
@ -679,107 +662,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"NLI Test Acc : 0.32767\n",
"NLI Dev Acc : 0.32869\n",
"NLI Test Acc : 0.32767\n",
"NLI Dev Acc : 0.34485\n",
"NLI Test Acc : 0.34334\n",
"NLI Dev Acc : 0.33306\n",
"NLI Test Acc : 0.32950\n",
"NLI Dev Acc : 0.33824\n",
"NLI Test Acc : 0.34283\n",
"NLI Dev Acc : 0.36507\n",
"NLI Test Acc : 0.36482\n",
"NLI Dev Acc : 0.34983\n",
"NLI Test Acc : 0.34762\n",
"NLI Dev Acc : 0.35216\n",
"NLI Test Acc : 0.34792\n",
"NLI Dev Acc : 0.33824\n",
"NLI Test Acc : 0.34283\n",
"NLI Dev Acc : 0.33835\n",
"NLI Test Acc : 0.34294\n",
"NLI Dev Acc : 0.33306\n",
"NLI Test Acc : 0.32950\n",
"NLI Dev Acc : 0.33306\n",
"NLI Test Acc : 0.32950\n",
"NLI Dev Acc : 0.34302\n",
"NLI Test Acc : 0.34161\n",
"NLI Dev Acc : 0.36385\n",
"NLI Test Acc : 0.37154\n",
"NLI Dev Acc : 0.38295\n",
"NLI Test Acc : 0.38386\n",
"NLI Dev Acc : 0.38793\n",
"NLI Test Acc : 0.38742\n",
"NLI Dev Acc : 0.39138\n",
"NLI Test Acc : 0.38976\n",
"NLI Dev Acc : 0.35135\n",
"NLI Test Acc : 0.35393\n",
"NLI Dev Acc : 0.34007\n",
"NLI Test Acc : 0.33744\n",
"NLI Dev Acc : 0.33306\n",
"NLI Test Acc : 0.32950\n",
"NLI Dev Acc : 0.33306\n",
"NLI Test Acc : 0.32950\n",
"NLI Dev Acc : 0.33804\n",
"NLI Test Acc : 0.34263\n",
"NLI Dev Acc : 0.34617\n",
"NLI Test Acc : 0.35413\n",
"NLI Dev Acc : 0.35034\n",
"NLI Test Acc : 0.34772\n",
"NLI Dev Acc : 0.33306\n",
"NLI Test Acc : 0.32950\n",
"NLI Dev Acc : 0.33560\n",
"NLI Test Acc : 0.33184\n",
"NLI Dev Acc : 0.35298\n",
"NLI Test Acc : 0.35922\n",
"NLI Dev Acc : 0.34363\n",
"NLI Test Acc : 0.34009\n",
"NLI Dev Acc : 0.36365\n",
"NLI Test Acc : 0.36238\n",
"NLI Dev Acc : 0.35145\n",
"NLI Test Acc : 0.35077\n",
"NLI Dev Acc : 0.33367\n",
"NLI Test Acc : 0.33092\n",
"NLI Dev Acc : 0.36141\n",
"NLI Test Acc : 0.35882\n",
"NLI Dev Acc : 0.35369\n",
"NLI Test Acc : 0.35678\n",
"NLI Dev Acc : 0.32869\n",
"NLI Test Acc : 0.32767\n",
"NLI Dev Acc : 0.32869\n",
"NLI Test Acc : 0.32767\n",
"NLI Dev Acc : 0.32869\n",
"NLI Test Acc : 0.32767\n",
"NLI Dev Acc : 0.32910\n",
"NLI Test Acc : 0.32807\n",
"NLI Dev Acc : 0.35470\n",
"NLI Test Acc : 0.35230\n",
"NLI Dev Acc : 0.40469\n",
"NLI Test Acc : 0.40869\n",
"NLI Dev Acc : 0.37106\n",
"NLI Test Acc : 0.36594\n",
"NLI Dev Acc : 0.37939\n",
"NLI Test Acc : 0.37246\n",
"NLI Dev Acc : 0.38254\n",
"NLI Test Acc : 0.37724\n",
"NLI Dev Acc : 0.37309\n",
"NLI Test Acc : 0.37449\n",
"NLI Dev Acc : 0.33936\n",
"NLI Test Acc : 0.33876\n",
"NLI Dev Acc : 0.34820\n",
"NLI Test Acc : 0.34864\n",
"NLI Dev Acc : 0.38387\n",
"NLI Test Acc : 0.38060\n",
"NLI Dev Acc : 0.40073\n",
"NLI Test Acc : 0.40564\n",
"NLI Dev Acc : 0.35867\n",
"NLI Test Acc : 0.36553\n",
"NLI Dev Acc : 0.35277\n",
"NLI Test Acc : 0.36044\n",
"NLI Dev Acc : 0.40205\n",
"NLI Test Acc : 0.39963\n",
"CPU times: user 1h 8min 52s, sys: 19min 2s, total: 1h 27min 55s\n",
"Wall time: 1h 28min\n"
"CPU times: user 29min 7s, sys: 7min 43s, total: 36min 51s\n",
"Wall time: 36min 59s\n"
]
}
],
@ -797,9 +681,19 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"******** Similarity Score for sentences **************\n",
" 0 1\n",
"0 1.00000 0.96147\n",
"1 0.96147 1.00000\n"
]
},
{
"data": {
"application/papermill.record+json": {
@ -811,11 +705,11 @@
"data": [
[
1,
0.9761484548147126
0.9614701535385097
],
[
0.9761484548147126,
1
0.9614701535385097,
0.9999999999999998
]
],
"index": [
@ -832,10 +726,12 @@
"source": [
"sentences = [\n",
" 'the quick brown fox jumped over the lazy dog',\n",
" 'it is going to be a bright sunshiny day tomorrow'\n",
" 'bright sunshiny day tomorrow.'\n",
" ]\n",
"\n",
"results = clf.predict(sentences)\n",
"print(\"******** Similarity Score for sentences **************\")\n",
"print(results)\n",
"pm.record(\"results\", results.to_dict(orient='split'))"
]
},
@ -848,13 +744,6 @@
"1. Subramanian, Sandeep and Trischler, Adam and Bengio, Yoshua and Pal, Christopher J, [*Learning general purpose distributed sentence representations via large scale multi-task learning*](https://arxiv.org/abs/1804.00079), ICLR, 2018.\n",
"3. Semantic textual similarity. url: http://nlpprogress.com/english/semantic_textual_similarity.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

Просмотреть файл

@ -2,8 +2,9 @@
# Licensed under the MIT License.
import json
import os
import pandas as pd
import numpy as np
import pandas as pd
from scenarios.sentence_similarity.gensen_train import train
from utils_nlp.gensen.create_gensen_model import (

Просмотреть файл

@ -90,13 +90,13 @@ def _split_and_cleanup(split_map, data_path):
"snli_1.0_{}.txt.s2.tok".format(file_split),
)
with open(s1_tok_path, "r") as fin, open(
"{}.tmp".format(s1_tok_path), "w"
"{}.tmp".format(s1_tok_path), "w"
) as tmp:
for line in fin:
s = line.replace('"', "")
tmp.write(s)
with open(s2_tok_path, "r") as fin, open(
"{}.tmp".format(s2_tok_path), "w"
"{}.tmp".format(s2_tok_path), "w"
) as tmp:
for line in fin:
s = line.replace('"', "")