Get datasets from web instead of blob

Signed-off-by: Keith Battocchi <kebatt@microsoft.com>
This commit is contained in:
Keith Battocchi 2023-06-12 17:13:51 -04:00 коммит произвёл Keith Battocchi
Родитель b0936acc51
Коммит 5c6d87b71f
11 изменённых файлов: 16 добавлений и 16 удалений

Просмотреть файл

@ -20,7 +20,7 @@ def test_dominicks():
if not os.path.isfile(file_name):
print("Downloading file (this might take a few seconds)...")
urllib.request.urlretrieve(
"https://msalicedatapublic.blob.core.windows.net/datasets/OrangeJuice/oj_large.csv", file_name)
"https://msalicedatapublic.z5.web.core.windows.net/datasets/OrangeJuice/oj_large.csv", file_name)
oj_data = pd.read_csv(file_name)
brands = sorted(set(oj_data["brand"]))

Просмотреть файл

@ -1510,7 +1510,7 @@
"\n",
"if not os.path.isfile(file_name):\n",
" print(\"Downloading file (this might take a few seconds)...\")\n",
" urllib.request.urlretrieve(\"https://msalicedatapublic.blob.core.windows.net/datasets/OrangeJuice/oj_large.csv\", file_name)\n",
" urllib.request.urlretrieve(\"https://msalicedatapublic.z5.web.core.windows.net/datasets/OrangeJuice/oj_large.csv\", file_name)\n",
"oj_data = pd.read_csv(file_name)\n",
"oj_data.head()"
]

Просмотреть файл

@ -137,7 +137,7 @@
"outputs": [],
"source": [
"# Import the sample pricing data\n",
"file_url = \"https://msalicedatapublic.blob.core.windows.net/datasets/Pricing/pricing_sample.csv\"\n",
"file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/Pricing/pricing_sample.csv\"\n",
"train_data = pd.read_csv(file_url)"
]
},

Просмотреть файл

@ -122,7 +122,7 @@
"execution_count": 2,
"source": [
"# Import the sample pricing data\n",
"file_url = \"https://msalicedatapublic.blob.core.windows.net/datasets/Pricing/pricing_sample.csv\"\n",
"file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/Pricing/pricing_sample.csv\"\n",
"train_data = pd.read_csv(file_url)"
],
"outputs": [],

Просмотреть файл

@ -128,7 +128,7 @@
"outputs": [],
"source": [
"# Import the sample multi-attribution data\n",
"file_url = \"https://msalicedatapublic.blob.core.windows.net/datasets/ROI/multi_attribution_sample.csv\"\n",
"file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/ROI/multi_attribution_sample.csv\"\n",
"multi_data = pd.read_csv(file_url)"
]
},

Просмотреть файл

@ -107,7 +107,7 @@
"execution_count": 2,
"source": [
"# Import the sample multi-attribution data\n",
"file_url = \"https://msalicedatapublic.blob.core.windows.net/datasets/ROI/multi_attribution_sample.csv\"\n",
"file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/ROI/multi_attribution_sample.csv\"\n",
"multi_data = pd.read_csv(file_url)"
],
"outputs": [],

Просмотреть файл

@ -150,7 +150,7 @@
"outputs": [],
"source": [
"# Import the sample AB data\n",
"file_url = \"https://msalicedatapublic.blob.core.windows.net/datasets/RecommendationAB/ab_sample.csv\" \n",
"file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\" \n",
"ab_data = pd.read_csv(file_url)"
]
},

Просмотреть файл

@ -129,7 +129,7 @@
"execution_count": 2,
"source": [
"# Import the sample AB data\n",
"file_url = \"https://msalicedatapublic.blob.core.windows.net/datasets/RecommendationAB/ab_sample.csv\" \n",
"file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/RecommendationAB/ab_sample.csv\" \n",
"ab_data = pd.read_csv(file_url)"
],
"outputs": [],

Просмотреть файл

@ -336,7 +336,7 @@
"# Data\n",
"## female\n",
"### read in and slice data\n",
"female_data = pd.read_csv('https://msalicedatapublic.blob.core.windows.net/datasets/Lalonde/calonico_smith_all.csv')\n",
"female_data = pd.read_csv('https://msalicedatapublic.z5.web.core.windows.net/datasets/Lalonde/calonico_smith_all.csv')\n",
"female_data[\"haschild\"]=(female_data[\"nchildren75\"]>0)*1\n",
"female_data = female_data[pd.notnull(female_data.re75) & pd.notnull(female_data.re79)]\n",
"female_treatment = female_data[female_data.treated==1.].copy()\n",
@ -349,13 +349,13 @@
"\n",
"## male\n",
"### read in and slice data\n",
"male_data = pd.read_csv('https://msalicedatapublic.blob.core.windows.net/datasets/Lalonde/smith_todd.csv')\n",
"male_data = pd.read_csv('https://msalicedatapublic.z5.web.core.windows.net/datasets/Lalonde/smith_todd.csv')\n",
"male_treatment = male_data[male_data.treated==1.].copy()\n",
"male_control = male_data[male_data.treated==0.].copy()\n",
"male_cps1 = pd.read_csv('https://msalicedatapublic.blob.core.windows.net/datasets/Lalonde/cps_controls.csv')\n",
"male_psid1 = pd.read_csv('https://msalicedatapublic.blob.core.windows.net/datasets/Lalonde/psid_controls.csv')\n",
"male_cps3 = pd.read_csv('https://msalicedatapublic.blob.core.windows.net/datasets/Lalonde/cps_controls3.csv')\n",
"male_psid3 = pd.read_csv('https://msalicedatapublic.blob.core.windows.net/datasets/Lalonde/psid_controls3.csv')\n",
"male_cps1 = pd.read_csv('https://msalicedatapublic.z5.web.core.windows.net/datasets/Lalonde/cps_controls.csv')\n",
"male_psid1 = pd.read_csv('https://msalicedatapublic.z5.web.core.windows.net/datasets/Lalonde/psid_controls.csv')\n",
"male_cps3 = pd.read_csv('https://msalicedatapublic.z5.web.core.windows.net/datasets/Lalonde/cps_controls3.csv')\n",
"male_psid3 = pd.read_csv('https://msalicedatapublic.z5.web.core.windows.net/datasets/Lalonde/psid_controls3.csv')\n",
"### some preprocessing\n",
"for df in [male_psid1,male_psid3,male_cps1,male_cps3]:\n",
" df.rename(columns={'treat':'treated', 'education':'educ', 'hispanic':'hisp'}, inplace=True)"

Просмотреть файл

@ -1925,7 +1925,7 @@
"\n",
"if not os.path.isfile(file_name):\n",
" print(\"Downloading file (this might take a few seconds)...\")\n",
" urllib.request.urlretrieve(\"https://msalicedatapublic.blob.core.windows.net/datasets/OrangeJuice/oj_large.csv\", file_name)\n",
" urllib.request.urlretrieve(\"https://msalicedatapublic.z5.web.core.windows.net/datasets/OrangeJuice/oj_large.csv\", file_name)\n",
"oj_data = pd.read_csv(file_name)"
]
},

Просмотреть файл

@ -375,7 +375,7 @@
}
],
"source": [
"file_url = \"https://msalicedatapublic.blob.core.windows.net/datasets/EmployeeAttrition/Employee-Attrition.csv\"\n",
"file_url = \"https://msalicedatapublic.z5.web.core.windows.net/datasets/EmployeeAttrition/Employee-Attrition.csv\"\n",
"attritionData = pd.read_csv(file_url)\n",
"attritionData.head(5)"
]