Edits to embedding trainer and baselines
This commit is contained in:
Родитель
1f52f797b4
Коммит
5d8583601c
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -33,7 +33,8 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"import gensim\n",
|
||||
"import sys"
|
||||
"import sys\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -50,15 +51,18 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"sys.path.append(\"../../../\") ## set the environment path\n",
|
||||
"\n",
|
||||
"BASE_DATA_PATH = \"../../../data\"\n",
|
||||
"SAVE_FILES_PATH = BASE_DATA_PATH + \"/trained_word_embeddings/\"\n",
|
||||
"\n",
|
||||
"from utils_nlp.dataset.stsbenchmark import STSBenchmark\n",
|
||||
"if not os.path.exists(SAVE_FILES_PATH):\n",
|
||||
" os.makedirs(SAVE_FILES_PATH)\n",
|
||||
" \n",
|
||||
"from utils_nlp.dataset.preprocess import (\n",
|
||||
" to_lowercase,\n",
|
||||
" to_spacy_tokens,\n",
|
||||
" rm_spacy_stopwords,\n",
|
||||
")"
|
||||
")\n",
|
||||
"from utils_nlp.dataset import stsbenchmark"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -67,8 +71,8 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initializing this instance runs the downloader and extractor behind the scenes, then convert to dataframe\n",
|
||||
"stsTrain = STSBenchmark(\"train\", base_data_path=BASE_DATA_PATH).as_dataframe()"
|
||||
"# Produce a pandas dataframe for the training set\n",
|
||||
"stsTrain = stsbenchmark.load_pandas_df(BASE_DATA_PATH, file_split=\"train\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -180,31 +184,23 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Embedding for apple: [-1.30064473e-01 1.84295833e-01 -1.53965428e-01 -9.69498605e-02\n",
|
||||
" 4.99420874e-02 -1.23197936e-01 7.28140250e-02 -4.12699208e-02\n",
|
||||
" 2.47626036e-01 -2.69805547e-04 -7.65557750e-04 2.08947986e-01\n",
|
||||
" 7.81186996e-03 5.42742060e-03 5.25087006e-02 2.47807354e-01\n",
|
||||
" -2.48165410e-02 9.91394650e-03 3.54040265e-02 -2.14830145e-01\n",
|
||||
" 2.24868301e-02 1.52286962e-01 1.85761824e-01 2.33249858e-01\n",
|
||||
" -1.46878466e-01 -7.60829672e-02 4.50950442e-03 1.15145534e-01\n",
|
||||
" -9.11297649e-02 6.20169528e-02 -5.24968617e-02 -8.68254527e-02\n",
|
||||
" -1.77496113e-04 8.58828798e-02 1.19839951e-01 2.51445977e-04\n",
|
||||
" -3.06774918e-02 2.70280894e-03 -9.14655998e-02 5.54770082e-02\n",
|
||||
" 6.70319721e-02 -1.10063367e-01 -9.94274616e-02 -1.62537303e-02\n",
|
||||
" 1.07709818e-01 -1.17890313e-01 -1.68436840e-02 2.67276943e-01\n",
|
||||
" 1.66485235e-02 -1.05556019e-01 8.72049183e-02 -2.79379219e-01\n",
|
||||
" -7.61673898e-02 -1.26047105e-01 -2.10570037e-01 1.06335968e-01\n",
|
||||
" -1.13933079e-01 8.91806409e-02 2.40348503e-02 1.27991261e-02\n",
|
||||
" -9.80987865e-03 -8.29416886e-02 -1.05351470e-01 9.63128060e-02\n",
|
||||
" -1.32907405e-01 -5.90794981e-02 -1.05936542e-01 5.24872467e-02\n",
|
||||
" -1.62810262e-04 1.90204114e-03 -1.07438803e-01 -1.86693370e-02\n",
|
||||
" -1.74428806e-01 -2.69948710e-02 -4.38663997e-02 -4.28975448e-02\n",
|
||||
" 9.05705541e-02 -2.10348725e-01 -1.16732195e-01 3.60293575e-02\n",
|
||||
" -2.08853818e-02 2.63118356e-01 1.76015347e-01 1.23300500e-01\n",
|
||||
" -3.50267850e-02 -4.52703685e-02 -1.70624122e-01 -3.28516886e-02\n",
|
||||
" 5.28835841e-02 8.53991881e-02 -8.47622007e-02 2.25594401e-01\n",
|
||||
" -1.77075803e-01 -5.37518365e-03 9.42931976e-03 1.78159177e-02\n",
|
||||
" -7.26433992e-02 -3.52309011e-02 -1.68363556e-01 2.79879309e-02]\n",
|
||||
"Embedding for apple: [-0.09213913 -0.02462959 -0.11255068 0.11652157 -0.18142793 -0.17555593\n",
|
||||
" 0.07121698 0.086779 -0.03097944 -0.01890221 -0.04537104 -0.10696206\n",
|
||||
" 0.02276987 0.08645772 0.09701958 -0.22489007 0.03993007 -0.0748188\n",
|
||||
" 0.0185363 -0.257262 0.06551826 0.01579769 -0.18179104 -0.22390445\n",
|
||||
" -0.06907904 -0.08859113 0.00603421 -0.01953833 -0.0306666 -0.20717207\n",
|
||||
" -0.07466035 -0.10690664 -0.06131361 -0.0747569 -0.03541371 -0.02307771\n",
|
||||
" -0.04890924 0.09401437 0.14955166 0.03299814 -0.20348735 0.1091179\n",
|
||||
" -0.05915498 0.07897269 -0.0392515 -0.1337506 0.16920352 0.00084969\n",
|
||||
" 0.09151786 -0.07067705 -0.00130636 -0.00040609 -0.09070218 -0.05848758\n",
|
||||
" 0.01417456 0.12759478 0.06773403 -0.03618362 0.05180905 -0.03987553\n",
|
||||
" 0.15119544 0.1374909 -0.2100861 -0.12180148 -0.01784294 0.09922534\n",
|
||||
" -0.01852375 0.2757332 -0.07551172 0.06188574 -0.0189024 0.08390908\n",
|
||||
" 0.06324708 -0.02126443 0.07884526 -0.06014811 -0.1291807 0.03968196\n",
|
||||
" -0.00395843 -0.05398612 0.25687164 0.06331551 -0.07450255 -0.12246329\n",
|
||||
" -0.1481028 0.11168568 -0.24994832 -0.05962377 0.04101507 0.06981998\n",
|
||||
" 0.02528387 0.1725297 0.10974599 0.12216322 -0.16961183 0.0819602\n",
|
||||
" 0.15518941 0.12973912 0.09754901 -0.0033999 ]\n",
|
||||
"\n",
|
||||
"First 30 vocabulary words: ['a', 'plane', 'is', 'taking', 'off', '.', 'man', 'playing', 'large', 'flute', 'spreading', 'cheese', 'on', 'pizza', 'three', 'men', 'are', 'the', 'some', 'fighting']\n"
|
||||
]
|
||||
|
@ -218,8 +214,8 @@
|
|||
"print(\"\\nFirst 30 vocabulary words:\", list(word2vec_model.wv.vocab)[:20])\n",
|
||||
"\n",
|
||||
"# 3. Save the word embeddings. We can save as binary format (to save space) or ASCII format\n",
|
||||
"word2vec_model.wv.save_word2vec_format(\"word2vec_model\", binary=True) # binary format\n",
|
||||
"word2vec_model.wv.save_word2vec_format(\"word2vec_model\", binary=False) # ASCII format"
|
||||
"word2vec_model.wv.save_word2vec_format(SAVE_FILES_PATH+\"word2vec_model\", binary=True) # binary format\n",
|
||||
"word2vec_model.wv.save_word2vec_format(SAVE_FILES_PATH+\"word2vec_model\", binary=False) # ASCII format"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -276,23 +272,31 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Embedding for apple: [-0.19466035 0.02329457 0.11905755 0.43202105 0.29234868 -0.4173747\n",
|
||||
" -0.42871934 -0.587514 -0.24620762 -0.30886024 -0.04068367 0.20132142\n",
|
||||
" -0.1593995 -0.34693947 -0.05454068 0.21118519 0.20061074 0.33920124\n",
|
||||
" 0.13465068 -0.16492505 -0.01792471 0.3517471 -0.42507643 -0.14185262\n",
|
||||
" 0.6766511 -0.35682997 0.38852996 0.08338872 -0.16927068 0.00101932\n",
|
||||
" 0.01033709 -0.00513317 -0.15251048 -0.07668231 0.02508747 -0.16725563\n",
|
||||
" 0.13578647 0.5188022 0.4219404 -0.29186445 -0.35036987 0.04769979\n",
|
||||
" -0.23967543 -0.03550959 -0.4072291 0.4920213 0.30146047 -0.569966\n",
|
||||
" 0.12033249 -0.24960376 -0.20398642 -0.37427858 0.04139522 0.28986236\n",
|
||||
" -0.31172943 0.7363574 -0.43040937 0.24302956 -0.2891899 -0.12707426\n",
|
||||
" -0.26763597 -0.3471016 0.08912586 -0.20722611 0.1529707 0.39230242\n",
|
||||
" -0.23503402 -0.00332095 -0.04347242 -0.00989339 0.08801552 -0.36916256\n",
|
||||
" -0.13720557 0.40390077 -0.21936806 -0.10426865 -0.18858872 0.15547332\n",
|
||||
" -0.3519439 0.00505178 0.1029634 -0.00991125 0.41537017 -0.10500967\n",
|
||||
" 0.43521944 0.26955605 -0.23591378 0.14193945 0.08484828 0.57761383\n",
|
||||
" -0.31014645 0.63834554 -0.15213463 -0.46310434 0.10502262 -0.03921723\n",
|
||||
" 0.21358919 -0.17636251 0.14675795 0.15879233]\n",
|
||||
"Embedding for apple: [-2.1927688e-01 2.9813698e-02 6.7616858e-02 3.6836052e-01\n",
|
||||
" 2.9166859e-01 -4.3027815e-01 -4.3850473e-01 -5.5472869e-01\n",
|
||||
" -2.4860071e-01 -2.8481758e-01 -8.5550338e-02 2.0373566e-01\n",
|
||||
" -8.8941768e-02 -3.5824496e-01 -7.3820040e-02 1.9162497e-01\n",
|
||||
" 1.9164029e-01 3.2222369e-01 1.7169371e-01 -1.8063694e-01\n",
|
||||
" -2.5478544e-02 3.8527763e-01 -4.4661409e-01 -1.9077049e-01\n",
|
||||
" 6.3831955e-01 -3.4981030e-01 3.6546609e-01 7.3591776e-02\n",
|
||||
" -1.7809562e-01 -3.0694399e-02 -6.5486156e-04 2.8458415e-02\n",
|
||||
" -1.4853548e-01 -1.1247496e-01 2.6613681e-02 -1.5886196e-01\n",
|
||||
" 1.0738261e-01 5.2269661e-01 4.1452998e-01 -2.4978566e-01\n",
|
||||
" -3.6866227e-01 4.5613028e-02 -2.5554851e-01 -2.9870963e-02\n",
|
||||
" -3.4256181e-01 4.1204464e-01 3.3703518e-01 -5.3163689e-01\n",
|
||||
" 2.7413066e-02 -3.2481736e-01 -2.1018679e-01 -3.5171476e-01\n",
|
||||
" 5.6522321e-02 3.2140371e-01 -3.0404109e-01 7.3594677e-01\n",
|
||||
" -4.7126335e-01 2.5894231e-01 -2.6430738e-01 -1.1617108e-01\n",
|
||||
" -2.7015641e-01 -3.2107431e-01 8.0991395e-02 -1.8977067e-01\n",
|
||||
" 1.6966967e-01 3.6855596e-01 -2.0167376e-01 -1.6917199e-02\n",
|
||||
" -4.0029153e-02 8.3818562e-02 8.8887364e-02 -3.4052727e-01\n",
|
||||
" -1.5159512e-01 4.2969501e-01 -1.8632193e-01 -4.8835874e-02\n",
|
||||
" -1.9202119e-01 1.5949497e-01 -3.4046504e-01 4.6990579e-03\n",
|
||||
" 9.2628546e-02 1.6060786e-02 3.8600260e-01 -8.4986687e-02\n",
|
||||
" 4.4739038e-01 2.1059968e-01 -1.9877617e-01 1.8113001e-01\n",
|
||||
" 9.4012588e-02 5.5849826e-01 -3.2842401e-01 6.3832772e-01\n",
|
||||
" -1.1614193e-01 -4.4778910e-01 1.4173931e-01 -2.4079295e-02\n",
|
||||
" 1.8156306e-01 -1.9836307e-01 1.4190227e-01 1.5471222e-01]\n",
|
||||
"\n",
|
||||
"First 30 vocabulary words: ['a', 'plane', 'is', 'taking', 'off', '.', 'man', 'playing', 'large', 'flute', 'spreading', 'cheese', 'on', 'pizza', 'three', 'men', 'are', 'the', 'some', 'fighting']\n"
|
||||
]
|
||||
|
@ -306,8 +310,8 @@
|
|||
"print(\"\\nFirst 30 vocabulary words:\", list(fastText_model.wv.vocab)[:20])\n",
|
||||
"\n",
|
||||
"# 3. Save the word embeddings. We can save as binary format (to save space) or ASCII format\n",
|
||||
"fastText_model.wv.save_word2vec_format(\"fastText_model\", binary=True) # binary format\n",
|
||||
"fastText_model.wv.save_word2vec_format(\"fastText_model\", binary=False) # ASCII format"
|
||||
"fastText_model.wv.save_word2vec_format(SAVE_FILES_PATH+\"fastText_model\", binary=True) # binary format\n",
|
||||
"fastText_model.wv.save_word2vec_format(SAVE_FILES_PATH+\"fastText_model\", binary=False) # ASCII format"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -359,7 +363,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"#save our corpus as tokens delimited by spaces with new line characters in between sentences\n",
|
||||
"with open('sentences.txt', 'w', encoding='utf8') as file:\n",
|
||||
"with open(BASE_DATA_PATH+'/clean/stsbenchmark/training-corpus-cleaned.txt', 'w', encoding='utf8') as file:\n",
|
||||
" for sent in sentences:\n",
|
||||
" file.write(\" \".join(sent) + \"\\n\")"
|
||||
]
|
||||
|
@ -375,7 +379,7 @@
|
|||
"2. max-vocab: upper bound on the number of vocabulary words to keep\n",
|
||||
"3. verbose: 0, 1, or 2 (default)\n",
|
||||
"\n",
|
||||
"Then provide the path to the text file we created in Step 0 (<\"sentences.txt\">) followed by a file path that we'll save the vocabulary to (\"glove/build/vocab.txt\")"
|
||||
"Then provide the path to the text file we created in Step 0 followed by a file path that we'll save the vocabulary to "
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -397,7 +401,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"!\"glove/build/vocab_count\" -min-count 5 -verbose 2 <\"sentences.txt\"> \"glove/build/vocab.txt\""
|
||||
"!\"glove/build/vocab_count\" -min-count 5 -verbose 2 <\"../../../data/clean/stsbenchmark/training-corpus-cleaned.txt\"> \"../../../data/trained_word_embeddings/vocab.txt\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -414,7 +418,7 @@
|
|||
"5. memory: soft limit for memory consumption, default 4\n",
|
||||
"6. max-product: limit the size of dense co-occurrence array by specifying the max product (integer) of the frequency counts of the two co-occurring words\n",
|
||||
"\n",
|
||||
"Then provide the path to the text file we created in Step 0 (<\"sentences.txt\">) followed by a file path that we'll save the co-occurrences to (\"glove/build/cooccurrence.bin\")"
|
||||
"Then provide the path to the text file we created in Step 0 followed by a file path that we'll save the co-occurrences to"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -431,7 +435,7 @@
|
|||
"context: symmetric\n",
|
||||
"max product: 13752509\n",
|
||||
"overflow length: 38028356\n",
|
||||
"Reading vocab from file \"glove/build/vocab.txt\"...loaded 3166 words.\n",
|
||||
"Reading vocab from file \"../../../data/trained_word_embeddings/vocab.txt\"...loaded 3166 words.\n",
|
||||
"Building lookup table...table contains 10023557 elements.\n",
|
||||
"Processing token: 0100000Processed 129989 tokens.\n",
|
||||
"Writing cooccurrences to disk.......2 files in total.\n",
|
||||
|
@ -441,7 +445,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"!\"glove/build/cooccur\" -memory 4 -vocab-file \"glove/build/vocab.txt\" -verbose 2 -window-size 15 <\"sentences.txt\"> \"glove/build/cooccurrence.bin\""
|
||||
"!\"glove/build/cooccur\" -memory 4 -vocab-file \"../../../data/trained_word_embeddings/vocab.txt\" -verbose 2 -window-size 15 <\"../../../data/clean/stsbenchmark/training-corpus-cleaned.txt\"> \"../../../data/trained_word_embeddings/cooccurrence.bin\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -455,7 +459,7 @@
|
|||
"2. memory: soft limit for memory consumption, default 4\n",
|
||||
"3. array-size: limit to the length of the buffer which stores chunks of data to shuffle before writing to disk\n",
|
||||
"\n",
|
||||
"Then provide the path to the co-occurrence file we created in Step 2 (<\"glove/build/cooccurrence.bin\">) followed by a file path that we'll save the shuffled co-occurrences to (\"glove/build/cooccurrence.shuf.bin\")"
|
||||
"Then provide the path to the co-occurrence file we created in Step 2 followed by a file path that we'll save the shuffled co-occurrences to"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -477,7 +481,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"!\"glove/build/shuffle\" -memory 4 -verbose 2 <\"glove/build/cooccurrence.bin\"> \"glove/build/cooccurrence.shuf.bin\""
|
||||
"!\"glove/build/shuffle\" -memory 4 -verbose 2 <\"../../../data/trained_word_embeddings/cooccurrence.bin\"> \"../../../data/trained_word_embeddings/cooccurrence.shuf.bin\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -515,28 +519,28 @@
|
|||
"vocab size: 3166\n",
|
||||
"x_max: 10.000000\n",
|
||||
"alpha: 0.750000\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 001, cost: 0.098453\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 002, cost: 0.084751\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 003, cost: 0.074604\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 004, cost: 0.071038\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 005, cost: 0.067709\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 006, cost: 0.064181\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 007, cost: 0.059996\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 008, cost: 0.055268\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 009, cost: 0.050708\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 010, cost: 0.046754\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 011, cost: 0.043402\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 012, cost: 0.040575\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 013, cost: 0.038056\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 014, cost: 0.035843\n",
|
||||
"04/29/19 - 01:26.33PM, iter: 015, cost: 0.033807\n"
|
||||
"04/30/19 - 10:33.02AM, iter: 001, cost: 0.098433\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 002, cost: 0.084675\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 003, cost: 0.074585\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 004, cost: 0.071048\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 005, cost: 0.067768\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 006, cost: 0.064212\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 007, cost: 0.060040\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 008, cost: 0.055310\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 009, cost: 0.050727\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 010, cost: 0.046803\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 011, cost: 0.043456\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 012, cost: 0.040570\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 013, cost: 0.038074\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 014, cost: 0.035818\n",
|
||||
"04/30/19 - 10:33.02AM, iter: 015, cost: 0.033807\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!\"glove/build/glove\" -save-file \"glove/build/GloVe_vectors\" -threads 8 -input-file \\\n",
|
||||
"\"glove/build/cooccurrence.shuf.bin\" -x-max 10 -iter 15 -vector-size 50 -binary 2 \\\n",
|
||||
"-vocab-file \"glove/build/vocab.txt\" -verbose 2"
|
||||
"!\"glove/build/glove\" -save-file \"../../../data/trained_word_embeddings/GloVe_vectors\" -threads 8 -input-file \\\n",
|
||||
"\"../../../data/trained_word_embeddings/cooccurrence.shuf.bin\" -x-max 10 -iter 15 -vector-size 50 -binary 2 \\\n",
|
||||
"-vocab-file \"../../../data/trained_word_embeddings/vocab.txt\" -verbose 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -561,7 +565,7 @@
|
|||
"source": [
|
||||
"#load in the saved word vectors\n",
|
||||
"glove_wv = {}\n",
|
||||
"with open(\"glove/build/GloVe_vectors.txt\", encoding='utf-8') as f:\n",
|
||||
"with open(\"../../../data/trained_word_embeddings/GloVe_vectors.txt\", encoding='utf-8') as f:\n",
|
||||
" for line in f:\n",
|
||||
" split_line = line.split(\" \")\n",
|
||||
" glove_wv[split_line[0]] = [float(i) for i in split_line[1:]]"
|
||||
|
@ -576,7 +580,7 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Embedding for apple: [-0.015303, -0.0512, -0.011988, 0.429914, 0.246523, 0.009762, 0.153154, -0.178636, 0.061723, 0.108515, -0.166807, -0.033258, -0.046394, 0.081953, -0.209458, 0.194758, 0.179153, 0.23262, -0.118717, -0.053151, -0.018892, -0.037714, -0.067396, 0.057499, 0.179459, 0.004552, -0.203058, 0.243629, -0.294976, 0.123971, 0.368613, 0.190665, -0.16738, -0.0599, 0.119195, -0.030108, -0.254778, -0.007862, -0.036998, 0.060919, -0.210459, 0.293917, 0.045603, -0.01104, 0.075651, -0.120635, -0.133497, -0.372606, -0.152981, 0.009014]\n",
|
||||
"Embedding for apple: [0.007199, -0.055337, -0.048813, 0.463647, 0.233898, -0.020051, 0.18876, -0.19439, 0.014477, 0.122465, -0.145506, -0.056616, -0.076315, 0.051205, -0.197457, 0.197818, 0.191692, 0.259758, -0.088431, -0.101713, -0.024687, -0.083431, -0.056415, 0.08024, 0.150831, 0.030778, -0.176252, 0.291561, -0.298596, 0.111546, 0.385694, 0.184508, -0.133928, 0.007924, 0.088849, 0.016869, -0.195535, 0.002015, -0.053591, 0.043867, -0.195157, 0.270429, -0.003891, -0.033436, 0.077898, -0.083324, -0.135095, -0.419319, -0.140611, 0.000322]\n",
|
||||
"\n",
|
||||
"First 30 vocabulary words: ['.', 'a', 'the', 'in', ',', 'is', 'to', 'of', 'and', 'on', 'man', '-', \"'s\", 'with', 'for', 'at', 'woman', 'are', 'that', 'two']\n"
|
||||
]
|
||||
|
|
Загрузка…
Ссылка в новой задаче