feat(data_prep) SNLI notebook showcasing data prep, Corrected nltk util for column_name
This commit is contained in:
Родитель
3964c04a7c
Коммит
6e46eade15
|
@ -0,0 +1,924 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SNLI Data Preparation notebook\n",
|
||||
"\n",
|
||||
"This notebook displays how to load SNLI dataset using provided util functions. \n",
|
||||
"\n",
|
||||
"## 0 Global Settings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"System version: 3.6.8 |Anaconda, Inc.| (default, Feb 21 2019, 18:30:04) [MSC v.1916 64 bit (AMD64)]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.append(\"../../\")\n",
|
||||
"\n",
|
||||
"from utils_nlp.dataset import snli\n",
|
||||
"\n",
|
||||
"print(\"System version: {}\".format(sys.version))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DATA_DIR_PATH = '../../data'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1 Load SNLI Dataset into pandas dataframe\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"SNLI zip contains [snli_1.0_dev.txt, snli_1.0_train.txt, snli_1.0_test.tx, snli_1.0_dev.jsonl, snli_1.0_train.jsonl, snli_1.0_test.jsonl]\n",
|
||||
"\n",
|
||||
"Below are the steps when load_pandas_df() is called.<br>\n",
|
||||
"\n",
|
||||
"- Download snli zip file at specified directory location.\n",
|
||||
"- Extracts the file based on the split mentioned.\n",
|
||||
"- Loads the split into a pandas dataframe. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# defaults to txt\n",
|
||||
"df = snli.load_pandas_df(DATA_DIR_PATH, 'train')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Glimpse of data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>gold_label</th>\n",
|
||||
" <th>sentence1_binary_parse</th>\n",
|
||||
" <th>sentence2_binary_parse</th>\n",
|
||||
" <th>sentence1_parse</th>\n",
|
||||
" <th>sentence2_parse</th>\n",
|
||||
" <th>sentence1</th>\n",
|
||||
" <th>sentence2</th>\n",
|
||||
" <th>captionID</th>\n",
|
||||
" <th>pairID</th>\n",
|
||||
" <th>label1</th>\n",
|
||||
" <th>label2</th>\n",
|
||||
" <th>label3</th>\n",
|
||||
" <th>label4</th>\n",
|
||||
" <th>label5</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>( ( ( A person ) ( on ( a horse ) ) ) ( ( jump...</td>\n",
|
||||
" <td>( ( A person ) ( ( is ( ( training ( his horse...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o...</td>\n",
|
||||
" <td>(ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ...</td>\n",
|
||||
" <td>A person on a horse jumps over a broken down a...</td>\n",
|
||||
" <td>A person is training his horse for a competition.</td>\n",
|
||||
" <td>3416050480.jpg#4</td>\n",
|
||||
" <td>3416050480.jpg#4r1n</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>contradiction</td>\n",
|
||||
" <td>( ( ( A person ) ( on ( a horse ) ) ) ( ( jump...</td>\n",
|
||||
" <td>( ( A person ) ( ( ( ( is ( at ( a diner ) ) )...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o...</td>\n",
|
||||
" <td>(ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ...</td>\n",
|
||||
" <td>A person on a horse jumps over a broken down a...</td>\n",
|
||||
" <td>A person is at a diner, ordering an omelette.</td>\n",
|
||||
" <td>3416050480.jpg#4</td>\n",
|
||||
" <td>3416050480.jpg#4r1c</td>\n",
|
||||
" <td>contradiction</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>( ( ( A person ) ( on ( a horse ) ) ) ( ( jump...</td>\n",
|
||||
" <td>( ( A person ) ( ( ( ( is outdoors ) , ) ( on ...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o...</td>\n",
|
||||
" <td>(ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ...</td>\n",
|
||||
" <td>A person on a horse jumps over a broken down a...</td>\n",
|
||||
" <td>A person is outdoors, on a horse.</td>\n",
|
||||
" <td>3416050480.jpg#4</td>\n",
|
||||
" <td>3416050480.jpg#4r1e</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>( Children ( ( ( smiling and ) waving ) ( at c...</td>\n",
|
||||
" <td>( They ( are ( smiling ( at ( their parents ) ...</td>\n",
|
||||
" <td>(ROOT (NP (S (NP (NNP Children)) (VP (VBG smil...</td>\n",
|
||||
" <td>(ROOT (S (NP (PRP They)) (VP (VBP are) (VP (VB...</td>\n",
|
||||
" <td>Children smiling and waving at camera</td>\n",
|
||||
" <td>They are smiling at their parents</td>\n",
|
||||
" <td>2267923837.jpg#2</td>\n",
|
||||
" <td>2267923837.jpg#2r1n</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>( Children ( ( ( smiling and ) waving ) ( at c...</td>\n",
|
||||
" <td>( There ( ( are children ) present ) )</td>\n",
|
||||
" <td>(ROOT (NP (S (NP (NNP Children)) (VP (VBG smil...</td>\n",
|
||||
" <td>(ROOT (S (NP (EX There)) (VP (VBP are) (NP (NN...</td>\n",
|
||||
" <td>Children smiling and waving at camera</td>\n",
|
||||
" <td>There are children present</td>\n",
|
||||
" <td>2267923837.jpg#2</td>\n",
|
||||
" <td>2267923837.jpg#2r1e</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" gold_label sentence1_binary_parse \\\n",
|
||||
"0 neutral ( ( ( A person ) ( on ( a horse ) ) ) ( ( jump... \n",
|
||||
"1 contradiction ( ( ( A person ) ( on ( a horse ) ) ) ( ( jump... \n",
|
||||
"2 entailment ( ( ( A person ) ( on ( a horse ) ) ) ( ( jump... \n",
|
||||
"3 neutral ( Children ( ( ( smiling and ) waving ) ( at c... \n",
|
||||
"4 entailment ( Children ( ( ( smiling and ) waving ) ( at c... \n",
|
||||
"\n",
|
||||
" sentence2_binary_parse \\\n",
|
||||
"0 ( ( A person ) ( ( is ( ( training ( his horse... \n",
|
||||
"1 ( ( A person ) ( ( ( ( is ( at ( a diner ) ) )... \n",
|
||||
"2 ( ( A person ) ( ( ( ( is outdoors ) , ) ( on ... \n",
|
||||
"3 ( They ( are ( smiling ( at ( their parents ) ... \n",
|
||||
"4 ( There ( ( are children ) present ) ) \n",
|
||||
"\n",
|
||||
" sentence1_parse \\\n",
|
||||
"0 (ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o... \n",
|
||||
"1 (ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o... \n",
|
||||
"2 (ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o... \n",
|
||||
"3 (ROOT (NP (S (NP (NNP Children)) (VP (VBG smil... \n",
|
||||
"4 (ROOT (NP (S (NP (NNP Children)) (VP (VBG smil... \n",
|
||||
"\n",
|
||||
" sentence2_parse \\\n",
|
||||
"0 (ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ... \n",
|
||||
"1 (ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ... \n",
|
||||
"2 (ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ... \n",
|
||||
"3 (ROOT (S (NP (PRP They)) (VP (VBP are) (VP (VB... \n",
|
||||
"4 (ROOT (S (NP (EX There)) (VP (VBP are) (NP (NN... \n",
|
||||
"\n",
|
||||
" sentence1 \\\n",
|
||||
"0 A person on a horse jumps over a broken down a... \n",
|
||||
"1 A person on a horse jumps over a broken down a... \n",
|
||||
"2 A person on a horse jumps over a broken down a... \n",
|
||||
"3 Children smiling and waving at camera \n",
|
||||
"4 Children smiling and waving at camera \n",
|
||||
"\n",
|
||||
" sentence2 captionID \\\n",
|
||||
"0 A person is training his horse for a competition. 3416050480.jpg#4 \n",
|
||||
"1 A person is at a diner, ordering an omelette. 3416050480.jpg#4 \n",
|
||||
"2 A person is outdoors, on a horse. 3416050480.jpg#4 \n",
|
||||
"3 They are smiling at their parents 2267923837.jpg#2 \n",
|
||||
"4 There are children present 2267923837.jpg#2 \n",
|
||||
"\n",
|
||||
" pairID label1 label2 label3 label4 label5 \n",
|
||||
"0 3416050480.jpg#4r1n neutral NaN NaN NaN NaN \n",
|
||||
"1 3416050480.jpg#4r1c contradiction NaN NaN NaN NaN \n",
|
||||
"2 3416050480.jpg#4r1e entailment NaN NaN NaN NaN \n",
|
||||
"3 2267923837.jpg#2r1n neutral NaN NaN NaN NaN \n",
|
||||
"4 2267923837.jpg#2r1e entailment NaN NaN NaN NaN "
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## load pandas df with snli_1.0_dev.txt "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"txt_df = snli.load_pandas_df(DATA_DIR_PATH, 'dev', 'txt')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>gold_label</th>\n",
|
||||
" <th>sentence1_binary_parse</th>\n",
|
||||
" <th>sentence2_binary_parse</th>\n",
|
||||
" <th>sentence1_parse</th>\n",
|
||||
" <th>sentence2_parse</th>\n",
|
||||
" <th>sentence1</th>\n",
|
||||
" <th>sentence2</th>\n",
|
||||
" <th>captionID</th>\n",
|
||||
" <th>pairID</th>\n",
|
||||
" <th>label1</th>\n",
|
||||
" <th>label2</th>\n",
|
||||
" <th>label3</th>\n",
|
||||
" <th>label4</th>\n",
|
||||
" <th>label5</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>( ( Two women ) ( ( are ( embracing ( while ( ...</td>\n",
|
||||
" <td>( ( The sisters ) ( ( are ( ( hugging goodbye ...</td>\n",
|
||||
" <td>(ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar...</td>\n",
|
||||
" <td>(ROOT (S (NP (DT The) (NNS sisters)) (VP (VBP ...</td>\n",
|
||||
" <td>Two women are embracing while holding to go pa...</td>\n",
|
||||
" <td>The sisters are hugging goodbye while holding ...</td>\n",
|
||||
" <td>4705552913.jpg#2</td>\n",
|
||||
" <td>4705552913.jpg#2r1n</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>( ( Two women ) ( ( are ( embracing ( while ( ...</td>\n",
|
||||
" <td>( ( Two woman ) ( ( are ( holding packages ) )...</td>\n",
|
||||
" <td>(ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar...</td>\n",
|
||||
" <td>(ROOT (S (NP (CD Two) (NN woman)) (VP (VBP are...</td>\n",
|
||||
" <td>Two women are embracing while holding to go pa...</td>\n",
|
||||
" <td>Two woman are holding packages.</td>\n",
|
||||
" <td>4705552913.jpg#2</td>\n",
|
||||
" <td>4705552913.jpg#2r1e</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>contradiction</td>\n",
|
||||
" <td>( ( Two women ) ( ( are ( embracing ( while ( ...</td>\n",
|
||||
" <td>( ( The men ) ( ( are ( fighting ( outside ( a...</td>\n",
|
||||
" <td>(ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar...</td>\n",
|
||||
" <td>(ROOT (S (NP (DT The) (NNS men)) (VP (VBP are)...</td>\n",
|
||||
" <td>Two women are embracing while holding to go pa...</td>\n",
|
||||
" <td>The men are fighting outside a deli.</td>\n",
|
||||
" <td>4705552913.jpg#2</td>\n",
|
||||
" <td>4705552913.jpg#2r1c</td>\n",
|
||||
" <td>contradiction</td>\n",
|
||||
" <td>contradiction</td>\n",
|
||||
" <td>contradiction</td>\n",
|
||||
" <td>contradiction</td>\n",
|
||||
" <td>contradiction</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>( ( ( Two ( young children ) ) ( in ( ( ( ( ( ...</td>\n",
|
||||
" <td>( ( ( Two kids ) ( in ( numbered jerseys ) ) )...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (CD Two) (JJ young) (NNS chil...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (CD Two) (NNS kids)) (PP (IN ...</td>\n",
|
||||
" <td>Two young children in blue jerseys, one with t...</td>\n",
|
||||
" <td>Two kids in numbered jerseys wash their hands.</td>\n",
|
||||
" <td>2407214681.jpg#0</td>\n",
|
||||
" <td>2407214681.jpg#0r1e</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>( ( ( Two ( young children ) ) ( in ( ( ( ( ( ...</td>\n",
|
||||
" <td>( ( ( Two kids ) ( at ( a ballgame ) ) ) ( ( w...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (CD Two) (JJ young) (NNS chil...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (CD Two) (NNS kids)) (PP (IN ...</td>\n",
|
||||
" <td>Two young children in blue jerseys, one with t...</td>\n",
|
||||
" <td>Two kids at a ballgame wash their hands.</td>\n",
|
||||
" <td>2407214681.jpg#0</td>\n",
|
||||
" <td>2407214681.jpg#0r1n</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" gold_label sentence1_binary_parse \\\n",
|
||||
"0 neutral ( ( Two women ) ( ( are ( embracing ( while ( ... \n",
|
||||
"1 entailment ( ( Two women ) ( ( are ( embracing ( while ( ... \n",
|
||||
"2 contradiction ( ( Two women ) ( ( are ( embracing ( while ( ... \n",
|
||||
"3 entailment ( ( ( Two ( young children ) ) ( in ( ( ( ( ( ... \n",
|
||||
"4 neutral ( ( ( Two ( young children ) ) ( in ( ( ( ( ( ... \n",
|
||||
"\n",
|
||||
" sentence2_binary_parse \\\n",
|
||||
"0 ( ( The sisters ) ( ( are ( ( hugging goodbye ... \n",
|
||||
"1 ( ( Two woman ) ( ( are ( holding packages ) )... \n",
|
||||
"2 ( ( The men ) ( ( are ( fighting ( outside ( a... \n",
|
||||
"3 ( ( ( Two kids ) ( in ( numbered jerseys ) ) )... \n",
|
||||
"4 ( ( ( Two kids ) ( at ( a ballgame ) ) ) ( ( w... \n",
|
||||
"\n",
|
||||
" sentence1_parse \\\n",
|
||||
"0 (ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar... \n",
|
||||
"1 (ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar... \n",
|
||||
"2 (ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar... \n",
|
||||
"3 (ROOT (S (NP (NP (CD Two) (JJ young) (NNS chil... \n",
|
||||
"4 (ROOT (S (NP (NP (CD Two) (JJ young) (NNS chil... \n",
|
||||
"\n",
|
||||
" sentence2_parse \\\n",
|
||||
"0 (ROOT (S (NP (DT The) (NNS sisters)) (VP (VBP ... \n",
|
||||
"1 (ROOT (S (NP (CD Two) (NN woman)) (VP (VBP are... \n",
|
||||
"2 (ROOT (S (NP (DT The) (NNS men)) (VP (VBP are)... \n",
|
||||
"3 (ROOT (S (NP (NP (CD Two) (NNS kids)) (PP (IN ... \n",
|
||||
"4 (ROOT (S (NP (NP (CD Two) (NNS kids)) (PP (IN ... \n",
|
||||
"\n",
|
||||
" sentence1 \\\n",
|
||||
"0 Two women are embracing while holding to go pa... \n",
|
||||
"1 Two women are embracing while holding to go pa... \n",
|
||||
"2 Two women are embracing while holding to go pa... \n",
|
||||
"3 Two young children in blue jerseys, one with t... \n",
|
||||
"4 Two young children in blue jerseys, one with t... \n",
|
||||
"\n",
|
||||
" sentence2 captionID \\\n",
|
||||
"0 The sisters are hugging goodbye while holding ... 4705552913.jpg#2 \n",
|
||||
"1 Two woman are holding packages. 4705552913.jpg#2 \n",
|
||||
"2 The men are fighting outside a deli. 4705552913.jpg#2 \n",
|
||||
"3 Two kids in numbered jerseys wash their hands. 2407214681.jpg#0 \n",
|
||||
"4 Two kids at a ballgame wash their hands. 2407214681.jpg#0 \n",
|
||||
"\n",
|
||||
" pairID label1 label2 label3 \\\n",
|
||||
"0 4705552913.jpg#2r1n neutral entailment neutral \n",
|
||||
"1 4705552913.jpg#2r1e entailment entailment entailment \n",
|
||||
"2 4705552913.jpg#2r1c contradiction contradiction contradiction \n",
|
||||
"3 2407214681.jpg#0r1e entailment entailment entailment \n",
|
||||
"4 2407214681.jpg#0r1n neutral neutral neutral \n",
|
||||
"\n",
|
||||
" label4 label5 \n",
|
||||
"0 neutral neutral \n",
|
||||
"1 entailment entailment \n",
|
||||
"2 contradiction contradiction \n",
|
||||
"3 entailment entailment \n",
|
||||
"4 entailment entailment "
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"txt_df.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## load pandas df with snli_1.0_dev.jsonl "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"jsonl_df = snli.load_pandas_df(DATA_DIR_PATH, 'dev', 'jsonl')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>annotator_labels</th>\n",
|
||||
" <th>captionID</th>\n",
|
||||
" <th>gold_label</th>\n",
|
||||
" <th>pairID</th>\n",
|
||||
" <th>sentence1</th>\n",
|
||||
" <th>sentence1_binary_parse</th>\n",
|
||||
" <th>sentence1_parse</th>\n",
|
||||
" <th>sentence2</th>\n",
|
||||
" <th>sentence2_binary_parse</th>\n",
|
||||
" <th>sentence2_parse</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>[neutral, entailment, neutral, neutral, neutral]</td>\n",
|
||||
" <td>4705552913.jpg#2</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>4705552913.jpg#2r1n</td>\n",
|
||||
" <td>Two women are embracing while holding to go pa...</td>\n",
|
||||
" <td>( ( Two women ) ( ( are ( embracing ( while ( ...</td>\n",
|
||||
" <td>(ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar...</td>\n",
|
||||
" <td>The sisters are hugging goodbye while holding ...</td>\n",
|
||||
" <td>( ( The sisters ) ( ( are ( ( hugging goodbye ...</td>\n",
|
||||
" <td>(ROOT (S (NP (DT The) (NNS sisters)) (VP (VBP ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>[entailment, entailment, entailment, entailmen...</td>\n",
|
||||
" <td>4705552913.jpg#2</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>4705552913.jpg#2r1e</td>\n",
|
||||
" <td>Two women are embracing while holding to go pa...</td>\n",
|
||||
" <td>( ( Two women ) ( ( are ( embracing ( while ( ...</td>\n",
|
||||
" <td>(ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar...</td>\n",
|
||||
" <td>Two woman are holding packages.</td>\n",
|
||||
" <td>( ( Two woman ) ( ( are ( holding packages ) )...</td>\n",
|
||||
" <td>(ROOT (S (NP (CD Two) (NN woman)) (VP (VBP are...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>[contradiction, contradiction, contradiction, ...</td>\n",
|
||||
" <td>4705552913.jpg#2</td>\n",
|
||||
" <td>contradiction</td>\n",
|
||||
" <td>4705552913.jpg#2r1c</td>\n",
|
||||
" <td>Two women are embracing while holding to go pa...</td>\n",
|
||||
" <td>( ( Two women ) ( ( are ( embracing ( while ( ...</td>\n",
|
||||
" <td>(ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar...</td>\n",
|
||||
" <td>The men are fighting outside a deli.</td>\n",
|
||||
" <td>( ( The men ) ( ( are ( fighting ( outside ( a...</td>\n",
|
||||
" <td>(ROOT (S (NP (DT The) (NNS men)) (VP (VBP are)...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>[entailment, entailment, entailment, entailmen...</td>\n",
|
||||
" <td>2407214681.jpg#0</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>2407214681.jpg#0r1e</td>\n",
|
||||
" <td>Two young children in blue jerseys, one with t...</td>\n",
|
||||
" <td>( ( ( Two ( young children ) ) ( in ( ( ( ( ( ...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (CD Two) (JJ young) (NNS chil...</td>\n",
|
||||
" <td>Two kids in numbered jerseys wash their hands.</td>\n",
|
||||
" <td>( ( ( Two kids ) ( in ( numbered jerseys ) ) )...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (CD Two) (NNS kids)) (PP (IN ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>[neutral, neutral, neutral, entailment, entail...</td>\n",
|
||||
" <td>2407214681.jpg#0</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>2407214681.jpg#0r1n</td>\n",
|
||||
" <td>Two young children in blue jerseys, one with t...</td>\n",
|
||||
" <td>( ( ( Two ( young children ) ) ( in ( ( ( ( ( ...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (CD Two) (JJ young) (NNS chil...</td>\n",
|
||||
" <td>Two kids at a ballgame wash their hands.</td>\n",
|
||||
" <td>( ( ( Two kids ) ( at ( a ballgame ) ) ) ( ( w...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (CD Two) (NNS kids)) (PP (IN ...</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" annotator_labels captionID \\\n",
|
||||
"0 [neutral, entailment, neutral, neutral, neutral] 4705552913.jpg#2 \n",
|
||||
"1 [entailment, entailment, entailment, entailmen... 4705552913.jpg#2 \n",
|
||||
"2 [contradiction, contradiction, contradiction, ... 4705552913.jpg#2 \n",
|
||||
"3 [entailment, entailment, entailment, entailmen... 2407214681.jpg#0 \n",
|
||||
"4 [neutral, neutral, neutral, entailment, entail... 2407214681.jpg#0 \n",
|
||||
"\n",
|
||||
" gold_label pairID \\\n",
|
||||
"0 neutral 4705552913.jpg#2r1n \n",
|
||||
"1 entailment 4705552913.jpg#2r1e \n",
|
||||
"2 contradiction 4705552913.jpg#2r1c \n",
|
||||
"3 entailment 2407214681.jpg#0r1e \n",
|
||||
"4 neutral 2407214681.jpg#0r1n \n",
|
||||
"\n",
|
||||
" sentence1 \\\n",
|
||||
"0 Two women are embracing while holding to go pa... \n",
|
||||
"1 Two women are embracing while holding to go pa... \n",
|
||||
"2 Two women are embracing while holding to go pa... \n",
|
||||
"3 Two young children in blue jerseys, one with t... \n",
|
||||
"4 Two young children in blue jerseys, one with t... \n",
|
||||
"\n",
|
||||
" sentence1_binary_parse \\\n",
|
||||
"0 ( ( Two women ) ( ( are ( embracing ( while ( ... \n",
|
||||
"1 ( ( Two women ) ( ( are ( embracing ( while ( ... \n",
|
||||
"2 ( ( Two women ) ( ( are ( embracing ( while ( ... \n",
|
||||
"3 ( ( ( Two ( young children ) ) ( in ( ( ( ( ( ... \n",
|
||||
"4 ( ( ( Two ( young children ) ) ( in ( ( ( ( ( ... \n",
|
||||
"\n",
|
||||
" sentence1_parse \\\n",
|
||||
"0 (ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar... \n",
|
||||
"1 (ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar... \n",
|
||||
"2 (ROOT (S (NP (CD Two) (NNS women)) (VP (VBP ar... \n",
|
||||
"3 (ROOT (S (NP (NP (CD Two) (JJ young) (NNS chil... \n",
|
||||
"4 (ROOT (S (NP (NP (CD Two) (JJ young) (NNS chil... \n",
|
||||
"\n",
|
||||
" sentence2 \\\n",
|
||||
"0 The sisters are hugging goodbye while holding ... \n",
|
||||
"1 Two woman are holding packages. \n",
|
||||
"2 The men are fighting outside a deli. \n",
|
||||
"3 Two kids in numbered jerseys wash their hands. \n",
|
||||
"4 Two kids at a ballgame wash their hands. \n",
|
||||
"\n",
|
||||
" sentence2_binary_parse \\\n",
|
||||
"0 ( ( The sisters ) ( ( are ( ( hugging goodbye ... \n",
|
||||
"1 ( ( Two woman ) ( ( are ( holding packages ) )... \n",
|
||||
"2 ( ( The men ) ( ( are ( fighting ( outside ( a... \n",
|
||||
"3 ( ( ( Two kids ) ( in ( numbered jerseys ) ) )... \n",
|
||||
"4 ( ( ( Two kids ) ( at ( a ballgame ) ) ) ( ( w... \n",
|
||||
"\n",
|
||||
" sentence2_parse \n",
|
||||
"0 (ROOT (S (NP (DT The) (NNS sisters)) (VP (VBP ... \n",
|
||||
"1 (ROOT (S (NP (CD Two) (NN woman)) (VP (VBP are... \n",
|
||||
"2 (ROOT (S (NP (DT The) (NNS men)) (VP (VBP are)... \n",
|
||||
"3 (ROOT (S (NP (NP (CD Two) (NNS kids)) (PP (IN ... \n",
|
||||
"4 (ROOT (S (NP (NP (CD Two) (NNS kids)) (PP (IN ... "
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"jsonl_df.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## load azure_ml dataflow object with snli_1.0_dev.txt \n",
|
||||
"\n",
|
||||
" P.S : Does not create a dataflow object as expected with jsonl file, cannot read a file when temp dir created"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"azureml_dataflow = snli.load_azureml_df(DATA_DIR_PATH) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>gold_label</th>\n",
|
||||
" <th>sentence1_binary_parse</th>\n",
|
||||
" <th>sentence2_binary_parse</th>\n",
|
||||
" <th>sentence1_parse</th>\n",
|
||||
" <th>sentence2_parse</th>\n",
|
||||
" <th>sentence1</th>\n",
|
||||
" <th>sentence2</th>\n",
|
||||
" <th>captionID</th>\n",
|
||||
" <th>pairID</th>\n",
|
||||
" <th>label1</th>\n",
|
||||
" <th>label2</th>\n",
|
||||
" <th>label3</th>\n",
|
||||
" <th>label4</th>\n",
|
||||
" <th>label5</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>( ( ( A person ) ( on ( a horse ) ) ) ( ( jump...</td>\n",
|
||||
" <td>( ( A person ) ( ( is ( ( training ( his horse...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o...</td>\n",
|
||||
" <td>(ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ...</td>\n",
|
||||
" <td>A person on a horse jumps over a broken down a...</td>\n",
|
||||
" <td>A person is training his horse for a competition.</td>\n",
|
||||
" <td>3416050480.jpg#4</td>\n",
|
||||
" <td>3416050480.jpg#4r1n</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>contradiction</td>\n",
|
||||
" <td>( ( ( A person ) ( on ( a horse ) ) ) ( ( jump...</td>\n",
|
||||
" <td>( ( A person ) ( ( ( ( is ( at ( a diner ) ) )...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o...</td>\n",
|
||||
" <td>(ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ...</td>\n",
|
||||
" <td>A person on a horse jumps over a broken down a...</td>\n",
|
||||
" <td>A person is at a diner, ordering an omelette.</td>\n",
|
||||
" <td>3416050480.jpg#4</td>\n",
|
||||
" <td>3416050480.jpg#4r1c</td>\n",
|
||||
" <td>contradiction</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>( ( ( A person ) ( on ( a horse ) ) ) ( ( jump...</td>\n",
|
||||
" <td>( ( A person ) ( ( ( ( is outdoors ) , ) ( on ...</td>\n",
|
||||
" <td>(ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o...</td>\n",
|
||||
" <td>(ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ...</td>\n",
|
||||
" <td>A person on a horse jumps over a broken down a...</td>\n",
|
||||
" <td>A person is outdoors, on a horse.</td>\n",
|
||||
" <td>3416050480.jpg#4</td>\n",
|
||||
" <td>3416050480.jpg#4r1e</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td>( Children ( ( ( smiling and ) waving ) ( at c...</td>\n",
|
||||
" <td>( They ( are ( smiling ( at ( their parents ) ...</td>\n",
|
||||
" <td>(ROOT (NP (S (NP (NNP Children)) (VP (VBG smil...</td>\n",
|
||||
" <td>(ROOT (S (NP (PRP They)) (VP (VBP are) (VP (VB...</td>\n",
|
||||
" <td>Children smiling and waving at camera</td>\n",
|
||||
" <td>They are smiling at their parents</td>\n",
|
||||
" <td>2267923837.jpg#2</td>\n",
|
||||
" <td>2267923837.jpg#2r1n</td>\n",
|
||||
" <td>neutral</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td>( Children ( ( ( smiling and ) waving ) ( at c...</td>\n",
|
||||
" <td>( There ( ( are children ) present ) )</td>\n",
|
||||
" <td>(ROOT (NP (S (NP (NNP Children)) (VP (VBG smil...</td>\n",
|
||||
" <td>(ROOT (S (NP (EX There)) (VP (VBP are) (NP (NN...</td>\n",
|
||||
" <td>Children smiling and waving at camera</td>\n",
|
||||
" <td>There are children present</td>\n",
|
||||
" <td>2267923837.jpg#2</td>\n",
|
||||
" <td>2267923837.jpg#2r1e</td>\n",
|
||||
" <td>entailment</td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" <td></td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" gold_label sentence1_binary_parse \\\n",
|
||||
"0 neutral ( ( ( A person ) ( on ( a horse ) ) ) ( ( jump... \n",
|
||||
"1 contradiction ( ( ( A person ) ( on ( a horse ) ) ) ( ( jump... \n",
|
||||
"2 entailment ( ( ( A person ) ( on ( a horse ) ) ) ( ( jump... \n",
|
||||
"3 neutral ( Children ( ( ( smiling and ) waving ) ( at c... \n",
|
||||
"4 entailment ( Children ( ( ( smiling and ) waving ) ( at c... \n",
|
||||
"\n",
|
||||
" sentence2_binary_parse \\\n",
|
||||
"0 ( ( A person ) ( ( is ( ( training ( his horse... \n",
|
||||
"1 ( ( A person ) ( ( ( ( is ( at ( a diner ) ) )... \n",
|
||||
"2 ( ( A person ) ( ( ( ( is outdoors ) , ) ( on ... \n",
|
||||
"3 ( They ( are ( smiling ( at ( their parents ) ... \n",
|
||||
"4 ( There ( ( are children ) present ) ) \n",
|
||||
"\n",
|
||||
" sentence1_parse \\\n",
|
||||
"0 (ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o... \n",
|
||||
"1 (ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o... \n",
|
||||
"2 (ROOT (S (NP (NP (DT A) (NN person)) (PP (IN o... \n",
|
||||
"3 (ROOT (NP (S (NP (NNP Children)) (VP (VBG smil... \n",
|
||||
"4 (ROOT (NP (S (NP (NNP Children)) (VP (VBG smil... \n",
|
||||
"\n",
|
||||
" sentence2_parse \\\n",
|
||||
"0 (ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ... \n",
|
||||
"1 (ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ... \n",
|
||||
"2 (ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) ... \n",
|
||||
"3 (ROOT (S (NP (PRP They)) (VP (VBP are) (VP (VB... \n",
|
||||
"4 (ROOT (S (NP (EX There)) (VP (VBP are) (NP (NN... \n",
|
||||
"\n",
|
||||
" sentence1 \\\n",
|
||||
"0 A person on a horse jumps over a broken down a... \n",
|
||||
"1 A person on a horse jumps over a broken down a... \n",
|
||||
"2 A person on a horse jumps over a broken down a... \n",
|
||||
"3 Children smiling and waving at camera \n",
|
||||
"4 Children smiling and waving at camera \n",
|
||||
"\n",
|
||||
" sentence2 captionID \\\n",
|
||||
"0 A person is training his horse for a competition. 3416050480.jpg#4 \n",
|
||||
"1 A person is at a diner, ordering an omelette. 3416050480.jpg#4 \n",
|
||||
"2 A person is outdoors, on a horse. 3416050480.jpg#4 \n",
|
||||
"3 They are smiling at their parents 2267923837.jpg#2 \n",
|
||||
"4 There are children present 2267923837.jpg#2 \n",
|
||||
"\n",
|
||||
" pairID label1 label2 label3 label4 label5 \n",
|
||||
"0 3416050480.jpg#4r1n neutral \n",
|
||||
"1 3416050480.jpg#4r1c contradiction \n",
|
||||
"2 3416050480.jpg#4r1e entailment \n",
|
||||
"3 2267923837.jpg#2r1n neutral \n",
|
||||
"4 2267923837.jpg#2r1e entailment "
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"azureml_dataflow.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -1,8 +1,8 @@
|
|||
import nltk
|
||||
|
||||
def nltk_tokenizer(snli_df):
|
||||
snli_df['sentence1_tokens'] = snli_df.apply(lambda row: nltk.word_tokenize(row['sentence1']), axis=1)
|
||||
snli_df['sentence2_tokens'] = snli_df.apply(lambda row: nltk.word_tokenize(row['sentence2']), axis=1)
|
||||
snli_df['sentence1_tokens'] = snli_df.apply(lambda row: nltk.word_tokenize(row['sentence_1']), axis=1)
|
||||
snli_df['sentence2_tokens'] = snli_df.apply(lambda row: nltk.word_tokenize(row['sentence_2']), axis=1)
|
||||
|
||||
return snli_df
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче