This commit is contained in:
miguelgfierro 2017-07-10 12:32:48 +00:00
Родитель a00d377484
Коммит 2bb77a0aaf
1 изменённых файлов: 66 добавлений и 209 удалений

Просмотреть файл

@ -16,25 +16,18 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"System version: 3.6.1 |Anaconda custom (64-bit)| (default, May 11 2017, 13:09:58) \n",
"System version: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n",
"[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
"XGBoost version: 0.6\n",
"LightGBM version: 0.2\n"
@ -53,6 +46,8 @@
"from sklearn.model_selection import train_test_split\n",
"from xgboost import XGBClassifier\n",
"from lightgbm import LGBMClassifier\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"print(\"System version: {}\".format(sys.version))\n",
"print(\"XGBoost version: {}\".format(pkg_resources.get_distribution('xgboost').version))\n",
@ -61,7 +56,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 5,
"metadata": {
"collapsed": false,
"deletable": true,
@ -72,30 +67,6 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.066793. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n"
]
}
],
"source": [
"random_seed = 42"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.074798. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n",
"INFO:libs.loaders:MOUNT_POINT not found in environment. Defaulting to /fileshare\n"
]
},
@ -104,8 +75,8 @@
"output_type": "stream",
"text": [
"(11000000, 29)\n",
"CPU times: user 1min 35s, sys: 9.88 s, total: 1min 45s\n",
"Wall time: 7min 54s\n"
"CPU times: user 1min 14s, sys: 5.65 s, total: 1min 20s\n",
"Wall time: 5min 53s\n"
]
}
],
@ -117,38 +88,17 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.088046. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
@ -334,7 +284,7 @@
"[5 rows x 29 columns]"
]
},
"execution_count": 4,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@ -345,7 +295,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"metadata": {
"collapsed": false,
"deletable": true,
@ -356,15 +306,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"20\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.113447. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n"
"24\n"
]
}
],
@ -376,24 +318,13 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.129374. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n",
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/xgboost-0.6-py3.6.egg/xgboost/sklearn.py:171: DeprecationWarning: The nthread parameter is deprecated as of version .6.Please use n_jobs instead.nthread is deprecated.\n",
" 'nthread is deprecated.', DeprecationWarning)\n"
]
}
],
"outputs": [],
"source": [
"xgb_clf_pipeline = XGBClassifier(max_depth=5, \n",
" learning_rate=0.1, \n",
@ -409,24 +340,13 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 9,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.146323. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n",
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/xgboost-0.6-py3.6.egg/xgboost/sklearn.py:171: DeprecationWarning: The nthread parameter is deprecated as of version .6.Please use n_jobs instead.nthread is deprecated.\n",
" 'nthread is deprecated.', DeprecationWarning)\n"
]
}
],
"outputs": [],
"source": [
"xgb_hist_clf_pipeline = XGBClassifier(max_depth=0, \n",
" learning_rate=0.1, \n",
@ -445,22 +365,13 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.160326. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n"
]
}
],
"outputs": [],
"source": [
"lgbm_clf_pipeline = LGBMClassifier(num_leaves=2**5, \n",
" learning_rate=0.1, \n",
@ -476,22 +387,13 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.168690. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n"
]
}
],
"outputs": [],
"source": [
"metrics_dict = {\n",
" 'Accuracy': accuracy_score,\n",
@ -505,53 +407,6 @@
" return {metric_name:metric(y_true, y_pred) for metric_name, metric in metrics.items()}"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.175556. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n"
]
}
],
"source": [
"def generate_feables(df):\n",
" X = df[df.columns.difference(['boson'])]\n",
" y = df['boson']\n",
" return X,y"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.180996. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n"
]
}
],
"source": [
"X, y = generate_feables(df)"
]
},
{
"cell_type": "code",
"execution_count": 12,
@ -560,18 +415,12 @@
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.187577. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n"
]
}
],
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=random_seed, test_size=500000)"
"def generate_feables(df):\n",
" X = df[df.columns.difference(['boson'])]\n",
" y = df['boson']\n",
" return X,y"
]
},
{
@ -582,16 +431,33 @@
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.193310. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n"
]
}
],
"outputs": [],
"source": [
"X, y = generate_feables(df)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=77, test_size=500000)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"results_dict = dict()"
]
@ -608,22 +474,13 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 16,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.199845. Please add timezone info to timestamps.\n",
" new_obj[k] = extract_dates(v)\n"
]
}
],
"outputs": [],
"source": [
"with Timer() as train_t:\n",
" xgb_clf_pipeline.fit(X_train,y_train)\n",
@ -634,7 +491,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 17,
"metadata": {
"collapsed": true,
"deletable": true,
@ -653,7 +510,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 18,
"metadata": {
"collapsed": true,
"deletable": true,
@ -667,7 +524,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 19,
"metadata": {
"collapsed": true,
"deletable": true,
@ -681,7 +538,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 20,
"metadata": {
"collapsed": true,
"deletable": true,
@ -710,7 +567,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 21,
"metadata": {
"collapsed": true,
"deletable": true,
@ -727,7 +584,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 22,
"metadata": {
"collapsed": true,
"deletable": true,
@ -746,7 +603,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 23,
"metadata": {
"collapsed": false,
"deletable": true,
@ -766,8 +623,8 @@
" \"Precision\": 0.6514447704512661,\n",
" \"Recall\": 0.9167383756038647\n",
" },\n",
" \"test_time\": 0.5048187420034083,\n",
" \"train_time\": 77.36947308199888\n",
" \"test_time\": 0.4961782629998197,\n",
" \"train_time\": 71.38830216599854\n",
" },\n",
" \"xgb\": {\n",
" \"performance\": {\n",
@ -777,8 +634,8 @@
" \"Precision\": 0.6418233549373553,\n",
" \"Recall\": 0.922037288647343\n",
" },\n",
" \"test_time\": 0.4216979429911589,\n",
" \"train_time\": 336.8079112419946\n",
" \"test_time\": 0.417350155999884,\n",
" \"train_time\": 1474.1256882889993\n",
" },\n",
" \"xgb_hist\": {\n",
" \"performance\": {\n",
@ -788,8 +645,8 @@
" \"Precision\": 0.6513028739393029,\n",
" \"Recall\": 0.9165534420289855\n",
" },\n",
" \"test_time\": 0.4771806640055729,\n",
" \"train_time\": 102.01795201399364\n",
" \"test_time\": 0.4240018679993227,\n",
" \"train_time\": 79.5958247769995\n",
" }\n",
"}\n"
]