HIGGS cpu
This commit is contained in:
Родитель
a00d377484
Коммит
2bb77a0aaf
|
@ -16,25 +16,18 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using TensorFlow backend.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"System version: 3.6.1 |Anaconda custom (64-bit)| (default, May 11 2017, 13:09:58) \n",
|
||||
"System version: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n",
|
||||
"[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
|
||||
"XGBoost version: 0.6\n",
|
||||
"LightGBM version: 0.2\n"
|
||||
|
@ -53,6 +46,8 @@
|
|||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from xgboost import XGBClassifier\n",
|
||||
"from lightgbm import LGBMClassifier\n",
|
||||
"import warnings\n",
|
||||
"warnings.filterwarnings('ignore')\n",
|
||||
"\n",
|
||||
"print(\"System version: {}\".format(sys.version))\n",
|
||||
"print(\"XGBoost version: {}\".format(pkg_resources.get_distribution('xgboost').version))\n",
|
||||
|
@ -61,7 +56,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -72,30 +67,6 @@
|
|||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.066793. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"random_seed = 42"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.074798. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n",
|
||||
"INFO:libs.loaders:MOUNT_POINT not found in environment. Defaulting to /fileshare\n"
|
||||
]
|
||||
},
|
||||
|
@ -104,8 +75,8 @@
|
|||
"output_type": "stream",
|
||||
"text": [
|
||||
"(11000000, 29)\n",
|
||||
"CPU times: user 1min 35s, sys: 9.88 s, total: 1min 45s\n",
|
||||
"Wall time: 7min 54s\n"
|
||||
"CPU times: user 1min 14s, sys: 5.65 s, total: 1min 20s\n",
|
||||
"Wall time: 5min 53s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -117,38 +88,17 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.088046. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style>\n",
|
||||
" .dataframe thead tr:only-child th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: left;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
|
@ -334,7 +284,7 @@
|
|||
"[5 rows x 29 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -345,7 +295,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -356,15 +306,7 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"20\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.113447. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n"
|
||||
"24\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -376,24 +318,13 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.129374. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n",
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/xgboost-0.6-py3.6.egg/xgboost/sklearn.py:171: DeprecationWarning: The nthread parameter is deprecated as of version .6.Please use n_jobs instead.nthread is deprecated.\n",
|
||||
" 'nthread is deprecated.', DeprecationWarning)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"xgb_clf_pipeline = XGBClassifier(max_depth=5, \n",
|
||||
" learning_rate=0.1, \n",
|
||||
|
@ -409,24 +340,13 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.146323. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n",
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/xgboost-0.6-py3.6.egg/xgboost/sklearn.py:171: DeprecationWarning: The nthread parameter is deprecated as of version .6.Please use n_jobs instead.nthread is deprecated.\n",
|
||||
" 'nthread is deprecated.', DeprecationWarning)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"xgb_hist_clf_pipeline = XGBClassifier(max_depth=0, \n",
|
||||
" learning_rate=0.1, \n",
|
||||
|
@ -445,22 +365,13 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.160326. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lgbm_clf_pipeline = LGBMClassifier(num_leaves=2**5, \n",
|
||||
" learning_rate=0.1, \n",
|
||||
|
@ -476,22 +387,13 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.168690. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metrics_dict = {\n",
|
||||
" 'Accuracy': accuracy_score,\n",
|
||||
|
@ -505,53 +407,6 @@
|
|||
" return {metric_name:metric(y_true, y_pred) for metric_name, metric in metrics.items()}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.175556. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def generate_feables(df):\n",
|
||||
" X = df[df.columns.difference(['boson'])]\n",
|
||||
" y = df['boson']\n",
|
||||
" return X,y"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.180996. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X, y = generate_feables(df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
|
@ -560,18 +415,12 @@
|
|||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.187577. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=random_seed, test_size=500000)"
|
||||
"def generate_feables(df):\n",
|
||||
" X = df[df.columns.difference(['boson'])]\n",
|
||||
" y = df['boson']\n",
|
||||
" return X,y"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -582,16 +431,33 @@
|
|||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.193310. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X, y = generate_feables(df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=77, test_size=500000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results_dict = dict()"
|
||||
]
|
||||
|
@ -608,22 +474,13 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.199845. Please add timezone info to timestamps.\n",
|
||||
" new_obj[k] = extract_dates(v)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with Timer() as train_t:\n",
|
||||
" xgb_clf_pipeline.fit(X_train,y_train)\n",
|
||||
|
@ -634,7 +491,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -653,7 +510,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -667,7 +524,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -681,7 +538,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -710,7 +567,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 21,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -727,7 +584,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 22,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -746,7 +603,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 23,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -766,8 +623,8 @@
|
|||
" \"Precision\": 0.6514447704512661,\n",
|
||||
" \"Recall\": 0.9167383756038647\n",
|
||||
" },\n",
|
||||
" \"test_time\": 0.5048187420034083,\n",
|
||||
" \"train_time\": 77.36947308199888\n",
|
||||
" \"test_time\": 0.4961782629998197,\n",
|
||||
" \"train_time\": 71.38830216599854\n",
|
||||
" },\n",
|
||||
" \"xgb\": {\n",
|
||||
" \"performance\": {\n",
|
||||
|
@ -777,8 +634,8 @@
|
|||
" \"Precision\": 0.6418233549373553,\n",
|
||||
" \"Recall\": 0.922037288647343\n",
|
||||
" },\n",
|
||||
" \"test_time\": 0.4216979429911589,\n",
|
||||
" \"train_time\": 336.8079112419946\n",
|
||||
" \"test_time\": 0.417350155999884,\n",
|
||||
" \"train_time\": 1474.1256882889993\n",
|
||||
" },\n",
|
||||
" \"xgb_hist\": {\n",
|
||||
" \"performance\": {\n",
|
||||
|
@ -788,8 +645,8 @@
|
|||
" \"Precision\": 0.6513028739393029,\n",
|
||||
" \"Recall\": 0.9165534420289855\n",
|
||||
" },\n",
|
||||
" \"test_time\": 0.4771806640055729,\n",
|
||||
" \"train_time\": 102.01795201399364\n",
|
||||
" \"test_time\": 0.4240018679993227,\n",
|
||||
" \"train_time\": 79.5958247769995\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
|
|
Загрузка…
Ссылка в новой задаче