update links to the public blob

This commit is contained in:
NORTHAMERICA\mez 2017-07-13 11:27:01 -04:00
Родитель 717942aa6c
Коммит 313ed51478
2 изменённых файлов: 31 добавлений и 27 удалений

Просмотреть файл

@ -115,9 +115,9 @@
"outputs": [],
"source": [
"# URLs to Original questions, Duplications, Answers and Function Words.\n",
"questions_url = 'https://mezsa.blob.core.windows.net/stackoverflow/orig-q.tsv.gz'\n",
"dupes_url = 'https://mezsa.blob.core.windows.net/stackoverflow/dup-q.tsv.gz'\n",
"answers_url = 'https://mezsa.blob.core.windows.net/stackoverflow/ans.tsv.gz'"
"questions_url = 'https://bostondata.blob.core.windows.net/stackoverflow/orig-q.tsv.gz'\n",
"dupes_url = 'https://bostondata.blob.core.windows.net/stackoverflow/dup-q.tsv.gz'\n",
"answers_url = 'https://bostondata.blob.core.windows.net/stackoverflow/ans.tsv.gz'"
]
},
{
@ -454,8 +454,8 @@
"outputs": [],
"source": [
"# Configure Blob Storage\n",
"storage_account_name = 'mezsa'\n",
"storage_account_key = 'X1Xwyn5ROxyQa4tmvjSza/Lv5bXLu7cZ1jWyfFhCEBCKFr78onDgFUH05F5iG2aq1IsU+DIooYDbPzKa821FSA=='\n",
"storage_account_name = '*****'\n",
"storage_account_key = '*********'\n",
"account = CloudStorageAccount(account_name=storage_account_name, account_key=storage_account_key)\n",
"blob_service = account.create_blob_service()\n",
"\n",

Просмотреть файл

@ -129,15 +129,13 @@
},
"outputs": [],
"source": [
"trainQ_url = 'https://mezsa.blob.core.windows.net/stackoverflownew/trainQ_tutorial.tsv'\n",
"testQ_url = 'https://mezsa.blob.core.windows.net/stackoverflownew/testQ_tutorial.tsv'\n",
"# answersC_url = 'https://mezsa.blob.core.windows.net/stackoverflownew/answersC_tutorial.tsv'\n",
"function_words_url = 'https://mezsa.blob.core.windows.net/stackoverflow/function_words.txt'\n",
"trainQ_url = 'https://bostondata.blob.core.windows.net/stackoverflow/trainQ_tutorial.tsv'\n",
"testQ_url = 'https://bostondata.blob.core.windows.net/stackoverflow/testQ_tutorial.tsv'\n",
"function_words_url = 'https://bostondata.blob.core.windows.net/stackoverflow/function_words.txt'\n",
"\n",
"# load datasets.\n",
"trainQ = pd.read_csv(trainQ_url, sep='\\t', index_col='Id', encoding='latin1')\n",
"testQ = pd.read_csv(testQ_url, sep='\\t', index_col='Id', encoding='latin1')\n",
"# answersC = pd.read_csv(answersC_url, sep='\\t', index_col='Id', encoding='latin1')\n",
"# Load the list of non-content bearing function words\n",
"functionwordHash = LoadListAsHash(function_words_url)"
]
@ -370,8 +368,6 @@
" # Split the text line into an array of words\n",
" wordArray = textData[i].split()\n",
" numWords = len(wordArray)\n",
"# if numWords == 0:\n",
"# print(textData[i])\n",
" \n",
" # Create an array marking each word as valid or invalid\n",
" validArray = [];\n",
@ -748,22 +744,22 @@
"output_type": "stream",
"text": [
"Start phrase learning with 0 phrases of 200 phrases learned\n",
"Iteration 1: Added 42 new phrases in 1.14 seconds (Learned 42 of max 200)\n",
"Iteration 2: Added 35 new phrases in 1.08 seconds (Learned 77 of max 200)\n",
"Iteration 3: Added 32 new phrases in 1.07 seconds (Learned 109 of max 200)\n",
"Iteration 1: Added 42 new phrases in 1.18 seconds (Learned 42 of max 200)\n",
"Iteration 2: Added 35 new phrases in 1.18 seconds (Learned 77 of max 200)\n",
"Iteration 3: Added 32 new phrases in 1.11 seconds (Learned 109 of max 200)\n",
"Iteration 4: Added 34 new phrases in 1.08 seconds (Learned 143 of max 200)\n",
"Iteration 5: Added 31 new phrases in 1.06 seconds (Learned 174 of max 200)\n",
"Iteration 6: Added 11 new phrases in 1.00 seconds (Learned 185 of max 200)\n",
"Iteration 7: Added 3 new phrases in 0.98 seconds (Learned 188 of max 200)\n",
"Iteration 8: Added 4 new phrases in 0.97 seconds (Learned 192 of max 200)\n",
"Iteration 9: Added 1 new phrases in 0.97 seconds (Learned 193 of max 200)\n",
"Iteration 5: Added 31 new phrases in 1.24 seconds (Learned 174 of max 200)\n",
"Iteration 6: Added 11 new phrases in 1.23 seconds (Learned 185 of max 200)\n",
"Iteration 7: Added 3 new phrases in 1.17 seconds (Learned 188 of max 200)\n",
"Iteration 8: Added 4 new phrases in 1.06 seconds (Learned 192 of max 200)\n",
"Iteration 9: Added 1 new phrases in 0.98 seconds (Learned 193 of max 200)\n",
"Iteration 10: Added 1 new phrases in 0.96 seconds (Learned 194 of max 200)\n",
"Iteration 11: Added 1 new phrases in 0.96 seconds (Learned 195 of max 200)\n",
"Iteration 12: Added 1 new phrases in 0.96 seconds (Learned 196 of max 200)\n",
"Iteration 11: Added 1 new phrases in 1.13 seconds (Learned 195 of max 200)\n",
"Iteration 12: Added 1 new phrases in 1.24 seconds (Learned 196 of max 200)\n",
"Iteration 13: Added 1 new phrases in 1.04 seconds (Learned 197 of max 200)\n",
"Iteration 14: Added 1 new phrases in 1.01 seconds (Learned 198 of max 200)\n",
"Iteration 15: Added 1 new phrases in 0.96 seconds (Learned 199 of max 200)\n",
"Iteration 16: Added 1 new phrases in 1.00 seconds (Learned 200 of max 200)\n",
"Iteration 14: Added 1 new phrases in 1.00 seconds (Learned 198 of max 200)\n",
"Iteration 15: Added 1 new phrases in 0.95 seconds (Learned 199 of max 200)\n",
"Iteration 16: Added 1 new phrases in 0.94 seconds (Learned 200 of max 200)\n",
"*** Phrase learning completed in 0.00 hours ***\n"
]
}
@ -1547,7 +1543,15 @@
"metadata": {
"collapsed": false
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 48 ms\n"
]
}
],
"source": [
"X_train, Y_train = tfidfTrain.T, np.array(trainQ['AnswerId'])\n",
"X_test = tfidfTest.T\n",
@ -1655,7 +1659,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 34,
"metadata": {
"collapsed": false
},