From 2bb77a0aaf59be39f13182ebb264e5bd0f2fa5c0 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Mon, 10 Jul 2017 12:32:48 +0000 Subject: [PATCH] HIGGS cpu --- experiments/06_HIGGS.ipynb | 275 +++++++++---------------------------- 1 file changed, 66 insertions(+), 209 deletions(-) diff --git a/experiments/06_HIGGS.ipynb b/experiments/06_HIGGS.ipynb index 2cb9d0b..5ca05ba 100644 --- a/experiments/06_HIGGS.ipynb +++ b/experiments/06_HIGGS.ipynb @@ -16,25 +16,18 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "System version: 3.6.1 |Anaconda custom (64-bit)| (default, May 11 2017, 13:09:58) \n", + "System version: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", "XGBoost version: 0.6\n", "LightGBM version: 0.2\n" @@ -53,6 +46,8 @@ "from sklearn.model_selection import train_test_split\n", "from xgboost import XGBClassifier\n", "from lightgbm import LGBMClassifier\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"XGBoost version: {}\".format(pkg_resources.get_distribution('xgboost').version))\n", @@ -61,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": { "collapsed": false, "deletable": true, @@ -72,30 +67,6 @@ "name": "stderr", "output_type": "stream", "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.066793. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n" - ] - } - ], - "source": [ - "random_seed = 42" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.074798. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n", "INFO:libs.loaders:MOUNT_POINT not found in environment. Defaulting to /fileshare\n" ] }, @@ -104,8 +75,8 @@ "output_type": "stream", "text": [ "(11000000, 29)\n", - "CPU times: user 1min 35s, sys: 9.88 s, total: 1min 45s\n", - "Wall time: 7min 54s\n" + "CPU times: user 1min 14s, sys: 5.65 s, total: 1min 20s\n", + "Wall time: 5min 53s\n" ] } ], @@ -117,38 +88,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.088046. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n" - ] - }, { "data": { "text/html": [ "
\n", - "\n", "\n", " \n", " \n", @@ -334,7 +284,7 @@ "[5 rows x 29 columns]" ] }, - "execution_count": 4, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -345,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": { "collapsed": false, "deletable": true, @@ -356,15 +306,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "20\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.113447. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n" + "24\n" ] } ], @@ -376,24 +318,13 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": { "collapsed": false, "deletable": true, "editable": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.129374. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n", - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/xgboost-0.6-py3.6.egg/xgboost/sklearn.py:171: DeprecationWarning: The nthread parameter is deprecated as of version .6.Please use n_jobs instead.nthread is deprecated.\n", - " 'nthread is deprecated.', DeprecationWarning)\n" - ] - } - ], + "outputs": [], "source": [ "xgb_clf_pipeline = XGBClassifier(max_depth=5, \n", " learning_rate=0.1, \n", @@ -409,24 +340,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": { "collapsed": false, "deletable": true, "editable": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.146323. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n", - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/xgboost-0.6-py3.6.egg/xgboost/sklearn.py:171: DeprecationWarning: The nthread parameter is deprecated as of version .6.Please use n_jobs instead.nthread is deprecated.\n", - " 'nthread is deprecated.', DeprecationWarning)\n" - ] - } - ], + "outputs": [], "source": [ "xgb_hist_clf_pipeline = XGBClassifier(max_depth=0, \n", " learning_rate=0.1, \n", @@ -445,22 +365,13 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": { "collapsed": false, "deletable": true, "editable": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.160326. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n" - ] - } - ], + "outputs": [], "source": [ "lgbm_clf_pipeline = LGBMClassifier(num_leaves=2**5, \n", " learning_rate=0.1, \n", @@ -476,22 +387,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": { "collapsed": false, "deletable": true, "editable": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.168690. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n" - ] - } - ], + "outputs": [], "source": [ "metrics_dict = {\n", " 'Accuracy': accuracy_score,\n", @@ -505,53 +407,6 @@ " return {metric_name:metric(y_true, y_pred) for metric_name, metric in metrics.items()}" ] }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.175556. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n" - ] - } - ], - "source": [ - "def generate_feables(df):\n", - " X = df[df.columns.difference(['boson'])]\n", - " y = df['boson']\n", - " return X,y" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.180996. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n" - ] - } - ], - "source": [ - "X, y = generate_feables(df)" - ] - }, { "cell_type": "code", "execution_count": 12, @@ -560,18 +415,12 @@ "deletable": true, "editable": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.187577. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n" - ] - } - ], + "outputs": [], "source": [ - "X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=random_seed, test_size=500000)" + "def generate_feables(df):\n", + " X = df[df.columns.difference(['boson'])]\n", + " y = df['boson']\n", + " return X,y" ] }, { @@ -582,16 +431,33 @@ "deletable": true, "editable": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.193310. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n" - ] - } - ], + "outputs": [], + "source": [ + "X, y = generate_feables(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=77, test_size=500000)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], "source": [ "results_dict = dict()" ] @@ -608,22 +474,13 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": { "collapsed": false, "deletable": true, "editable": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/hoaphumanoid/anaconda3/envs/strata2/lib/python3.6/site-packages/jupyter_client/jsonutil.py:67: DeprecationWarning: Interpreting naive datetime as local 2017-06-30 09:20:30.199845. Please add timezone info to timestamps.\n", - " new_obj[k] = extract_dates(v)\n" - ] - } - ], + "outputs": [], "source": [ "with Timer() as train_t:\n", " xgb_clf_pipeline.fit(X_train,y_train)\n", @@ -634,7 +491,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "metadata": { "collapsed": true, "deletable": true, @@ -653,7 +510,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "metadata": { "collapsed": true, "deletable": true, @@ -667,7 +524,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "metadata": { "collapsed": true, "deletable": true, @@ -681,7 +538,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "metadata": { "collapsed": true, "deletable": true, @@ -710,7 +567,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "metadata": { "collapsed": true, "deletable": true, @@ -727,7 +584,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 22, "metadata": { "collapsed": true, "deletable": true, @@ -746,7 +603,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 23, "metadata": { "collapsed": false, "deletable": true, @@ -766,8 +623,8 @@ " \"Precision\": 0.6514447704512661,\n", " \"Recall\": 0.9167383756038647\n", " },\n", - " \"test_time\": 0.5048187420034083,\n", - " \"train_time\": 77.36947308199888\n", + " \"test_time\": 0.4961782629998197,\n", + " \"train_time\": 71.38830216599854\n", " },\n", " \"xgb\": {\n", " \"performance\": {\n", @@ -777,8 +634,8 @@ " \"Precision\": 0.6418233549373553,\n", " \"Recall\": 0.922037288647343\n", " },\n", - " \"test_time\": 0.4216979429911589,\n", - " \"train_time\": 336.8079112419946\n", + " \"test_time\": 0.417350155999884,\n", + " \"train_time\": 1474.1256882889993\n", " },\n", " \"xgb_hist\": {\n", " \"performance\": {\n", @@ -788,8 +645,8 @@ " \"Precision\": 0.6513028739393029,\n", " \"Recall\": 0.9165534420289855\n", " },\n", - " \"test_time\": 0.4771806640055729,\n", - " \"train_time\": 102.01795201399364\n", + " \"test_time\": 0.4240018679993227,\n", + " \"train_time\": 79.5958247769995\n", " }\n", "}\n" ]