updated papermill to 2.2 and changed to sb.glue

This commit is contained in:
Tao Wu 2020-12-29 22:06:19 +00:00
Родитель 9604bb412a
Коммит 1a7c29156e
30 изменённых файлов: 157 добавлений и 133 удалений

Просмотреть файл

@ -480,16 +480,17 @@
"if is_jupyter():\n",
" # Record results with papermill for tests\n",
" import papermill as pm\n",
" pm.record(\"map\", rank_eval.map_at_k())\n",
" pm.record(\"ndcg\", rank_eval.ndcg_at_k())\n",
" pm.record(\"precision\", rank_eval.precision_at_k())\n",
" pm.record(\"recall\", rank_eval.recall_at_k())\n",
" pm.record(\"rmse\", rating_eval.rmse())\n",
" pm.record(\"mae\", rating_eval.mae())\n",
" pm.record(\"exp_var\", rating_eval.exp_var())\n",
" pm.record(\"rsquared\", rating_eval.rsquared())\n",
" pm.record(\"train_time\", train_time.interval)\n",
" pm.record(\"test_time\", test_time.interval)"
" import scrapbook as sb\n",
" sb.glue(\"map\", rank_eval.map_at_k())\n",
" sb.glue(\"ndcg\", rank_eval.ndcg_at_k())\n",
" sb.glue(\"precision\", rank_eval.precision_at_k())\n",
" sb.glue(\"recall\", rank_eval.recall_at_k())\n",
" sb.glue(\"rmse\", rating_eval.rmse())\n",
" sb.glue(\"mae\", rating_eval.mae())\n",
" sb.glue(\"exp_var\", rating_eval.exp_var())\n",
" sb.glue(\"rsquared\", rating_eval.rsquared())\n",
" sb.glue(\"train_time\", train_time.interval)\n",
" sb.glue(\"test_time\", test_time.interval)"
]
},
{
@ -524,4 +525,4 @@
},
"nbformat": 4,
"nbformat_minor": 1
}
}

Просмотреть файл

@ -86,6 +86,7 @@
"import os\n",
"from tempfile import TemporaryDirectory\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"import tensorflow as tf\n",
"\n",
"from reco_utils.recommender.deeprec.deeprec_utils import download_deeprec_resources, prepare_hparams\n",
@ -376,7 +377,7 @@
"metadata": {},
"outputs": [],
"source": [
"pm.record(\"res\", res)"
"sb.glue(\"res\", res)"
]
},
{
@ -430,4 +431,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -47,6 +47,7 @@
"import pandas as pd\n",
"import numpy as np\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"import torch, fastai\n",
"from fastai.collab import EmbeddingDotBias, collab_learner, CollabDataBunch, load_learner\n",
"\n",
@ -884,16 +885,16 @@
],
"source": [
"# Record results with papermill for tests\n",
"pm.record(\"map\", eval_map)\n",
"pm.record(\"ndcg\", eval_ndcg)\n",
"pm.record(\"precision\", eval_precision)\n",
"pm.record(\"recall\", eval_recall)\n",
"pm.record(\"rmse\", eval_rmse)\n",
"pm.record(\"mae\", eval_mae)\n",
"pm.record(\"exp_var\", eval_exp_var)\n",
"pm.record(\"rsquared\", eval_r2)\n",
"pm.record(\"train_time\", train_time)\n",
"pm.record(\"test_time\", test_time)"
"sb.glue(\"map\", eval_map)\n",
"sb.glue(\"ndcg\", eval_ndcg)\n",
"sb.glue(\"precision\", eval_precision)\n",
"sb.glue(\"recall\", eval_recall)\n",
"sb.glue(\"rmse\", eval_rmse)\n",
"sb.glue(\"mae\", eval_mae)\n",
"sb.glue(\"exp_var\", eval_exp_var)\n",
"sb.glue(\"rsquared\", eval_r2)\n",
"sb.glue(\"train_time\", train_time)\n",
"sb.glue(\"test_time\", test_time)"
]
},
{
@ -924,4 +925,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -26,6 +26,7 @@
"import pandas as pd\n",
"import numpy as np\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"sys.path.append(\"../../\")\n",
"sys.path.append(\"../../reco_utils/recommender/geoimc/\")\n",
"\n",
@ -289,8 +290,8 @@
"metadata": {},
"outputs": [],
"source": [
"pm.record(\"rmse\", RMSE)\n",
"pm.record(\"mae\", MAE)"
"sb.glue(\"rmse\", RMSE)\n",
"sb.glue(\"mae\", MAE)"
]
},
{
@ -328,4 +329,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

Просмотреть файл

@ -52,6 +52,7 @@
"import numpy as np\n",
"import lightgbm as lgb\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"import pandas as pd\n",
"import category_encoders as ce\n",
"from tempfile import TemporaryDirectory\n",
@ -727,7 +728,7 @@
"logloss = log_loss(np.asarray(test_y.reshape(-1)), np.asarray(test_preds), eps=1e-12)\n",
"res_basic = {\"auc\": auc, \"logloss\": logloss}\n",
"print(res_basic)\n",
"pm.record(\"res_basic\", res_basic)"
"sb.glue(\"res_basic\", res_basic)"
]
},
{
@ -945,7 +946,7 @@
"logloss = log_loss(np.asarray(test_y.reshape(-1)), np.asarray(test_preds), eps=1e-12)\n",
"res_optim = {\"auc\": auc, \"logloss\": logloss}\n",
"print(res_optim)\n",
"pm.record(\"res_optim\", res_optim)"
"sb.glue(\"res_optim\", res_optim)"
]
},
{
@ -1018,4 +1019,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -93,6 +93,7 @@
"import zipfile\n",
"from tqdm import tqdm\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"from tempfile import TemporaryDirectory\n",
"import tensorflow as tf\n",
"\n",
@ -474,7 +475,7 @@
}
],
"source": [
"pm.record(\"res_syn\", res_syn)"
"sb.glue(\"res_syn\", res_syn)"
]
},
{
@ -591,4 +592,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

Просмотреть файл

@ -93,6 +93,7 @@
"import zipfile\n",
"from tqdm import tqdm\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"from tempfile import TemporaryDirectory\n",
"import tensorflow as tf\n",
"\n",
@ -475,7 +476,7 @@
}
],
"source": [
"pm.record(\"res_syn\", res_syn)"
"sb.glue(\"res_syn\", res_syn)"
]
},
{
@ -592,4 +593,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

Просмотреть файл

@ -298,12 +298,13 @@
"if is_jupyter():\n",
" # Record results with papermill for tests\n",
" import papermill as pm\n",
" pm.record(\"map\", eval_map)\n",
" pm.record(\"ndcg\", eval_ndcg)\n",
" pm.record(\"precision\", eval_precision)\n",
" pm.record(\"recall\", eval_recall)\n",
" pm.record(\"train_time\", train_time)\n",
" pm.record(\"test_time\", test_time)"
" import scrapbook as sb\n",
" sb.glue(\"map\", eval_map)\n",
" sb.glue(\"ndcg\", eval_ndcg)\n",
" sb.glue(\"precision\", eval_precision)\n",
" sb.glue(\"recall\", eval_recall)\n",
" sb.glue(\"train_time\", train_time)\n",
" sb.glue(\"test_time\", test_time)"
]
},
{
@ -336,4 +337,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -93,6 +93,7 @@
"import zipfile\n",
"from tqdm import tqdm\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"from tempfile import TemporaryDirectory\n",
"import tensorflow as tf\n",
"\n",
@ -455,7 +456,7 @@
}
],
"source": [
"pm.record(\"res_syn\", res_syn)"
"sb.glue(\"res_syn\", res_syn)"
]
},
{
@ -570,4 +571,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

Просмотреть файл

@ -93,6 +93,7 @@
"import zipfile\n",
"from tqdm import tqdm\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"from tempfile import TemporaryDirectory\n",
"import tensorflow as tf\n",
"\n",
@ -473,7 +474,7 @@
}
],
"source": [
"pm.record(\"res_syn\", res_syn)"
"sb.glue(\"res_syn\", res_syn)"
]
},
{
@ -590,4 +591,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

Просмотреть файл

@ -73,6 +73,7 @@
"%matplotlib inline\n",
"\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"\n",
"from reco_utils.recommender.rbm.rbm import RBM\n",
"from reco_utils.dataset.python_splitters import numpy_stratified_split\n",
@ -776,12 +777,12 @@
],
"source": [
"# Record results with papermill for tests\n",
"pm.record(\"map\", eval_100k['MAP'][0])\n",
"pm.record(\"ndcg\", eval_100k['nDCG@k'][0])\n",
"pm.record(\"precision\", eval_100k['Precision@k'][0])\n",
"pm.record(\"recall\", eval_100k['Recall@k'][0])\n",
"pm.record(\"train_time\", train_time)\n",
"pm.record(\"test_time\", test_time)"
"sb.glue(\"map\", eval_100k['MAP'][0])\n",
"sb.glue(\"ndcg\", eval_100k['nDCG@k'][0])\n",
"sb.glue(\"precision\", eval_100k['Precision@k'][0])\n",
"sb.glue(\"recall\", eval_100k['Recall@k'][0])\n",
"sb.glue(\"train_time\", train_time)\n",
"sb.glue(\"test_time\", test_time)"
]
},
{
@ -820,4 +821,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

Просмотреть файл

@ -48,6 +48,7 @@
"import os\n",
"import logging\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"from tempfile import TemporaryDirectory\n",
"\n",
"import tensorflow as tf\n",
@ -456,7 +457,7 @@
"source": [
"res_syn = model.run_eval(test_file, num_ngs=test_num_ngs)\n",
"print(res_syn)\n",
"pm.record(\"res_syn\", res_syn)"
"sb.glue(\"res_syn\", res_syn)"
]
},
{
@ -792,4 +793,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -62,6 +62,7 @@
"\n",
"import numpy as np\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"import pandas as pd\n",
"import sklearn.preprocessing\n",
"import tensorflow as tf\n",
@ -816,7 +817,7 @@
"if EVALUATE_WHILE_TRAINING:\n",
" logs = evaluation_logger.get_log()\n",
" for i, (m, v) in enumerate(logs.items(), 1):\n",
" pm.record(\"eval_{}\".format(m), v)\n",
" sb.glue(\"eval_{}\".format(m), v)\n",
" x = [save_checkpoints_steps*i for i in range(1, len(v)+1)]\n",
" plot.line_graph(\n",
" values=list(zip(v, x)),\n",
@ -912,7 +913,7 @@
" rating_results = {}\n",
" for m in RATING_METRICS:\n",
" result = evaluator.metrics[m](test, prediction_df, **cols)\n",
" pm.record(m, result)\n",
" sb.glue(m, result)\n",
" rating_results[m] = result\n",
" print(rating_results)"
]
@ -985,7 +986,7 @@
" ranking_results = {}\n",
" for m in RANKING_METRICS:\n",
" result = evaluator.metrics[m](test, prediction_df, **{**cols, 'k': TOP_K})\n",
" pm.record(m, result)\n",
" sb.glue(m, result)\n",
" ranking_results[m] = result\n",
" print(ranking_results)"
]
@ -1050,7 +1051,7 @@
" tf_feat_cols=wide_columns+deep_columns,\n",
" base_dir=EXPORT_DIR_BASE\n",
")\n",
"pm.record('saved_model_dir', str(exported_path))\n",
"sb.glue('saved_model_dir', str(exported_path))\n",
"print(\"Model exported to\", str(exported_path))"
]
},
@ -1098,4 +1099,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -50,6 +50,7 @@
"sys.path.append(\"../../\")\n",
"import os\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"from tempfile import TemporaryDirectory\n",
"\n",
"import tensorflow as tf\n",
@ -352,7 +353,7 @@
"source": [
"res_syn = model.run_eval(test_file)\n",
"print(res_syn)\n",
"pm.record(\"res_syn\", res_syn)"
"sb.glue(\"res_syn\", res_syn)"
]
},
{
@ -533,7 +534,7 @@
"# check the predictive performance after the model is trained\n",
"res_real = model.run_eval(test_file)\n",
"print(res_real)\n",
"pm.record(\"res_real\", res_real)"
"sb.glue(\"res_real\", res_real)"
]
},
{
@ -584,4 +585,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -36,6 +36,7 @@
"print(\"System version: {}\".format(sys.version))\n",
"\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"import pandas as pd\n",
"import networkx as nx\n",
"import matplotlib.pyplot as plt\n",
@ -548,7 +549,7 @@
],
"source": [
"# Record results with papermill for unit-tests\n",
"pm.record(\"length_result\", number_movies)"
"sb.glue(\"length_result\", number_movies)"
]
}
],
@ -574,4 +575,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -780,14 +780,15 @@
"if is_jupyter():\n",
" # Record results with papermill for unit-tests\n",
" import papermill as pm\n",
" pm.record(\"map\", eval_map)\n",
" pm.record(\"ndcg\", eval_ndcg)\n",
" pm.record(\"precision\", eval_precision)\n",
" pm.record(\"recall\", eval_recall)\n",
" pm.record(\"rmse\", eval_rmse)\n",
" pm.record(\"mae\", eval_mae)\n",
" pm.record(\"exp_var\", eval_exp_var)\n",
" pm.record(\"rsquared\", eval_rsquared)"
" import scrapbook as sb\n",
" sb.glue(\"map\", eval_map)\n",
" sb.glue(\"ndcg\", eval_ndcg)\n",
" sb.glue(\"precision\", eval_precision)\n",
" sb.glue(\"recall\", eval_recall)\n",
" sb.glue(\"rmse\", eval_rmse)\n",
" sb.glue(\"mae\", eval_mae)\n",
" sb.glue(\"exp_var\", eval_exp_var)\n",
" sb.glue(\"rsquared\", eval_rsquared)"
]
},
{
@ -829,4 +830,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -47,6 +47,7 @@
"import os\n",
"import cornac\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"import pandas as pd\n",
"from reco_utils.dataset import movielens\n",
"from reco_utils.dataset.python_splitters import python_random_split\n",
@ -552,10 +553,10 @@
"outputs": [],
"source": [
"# Record results with papermill for tests\n",
"pm.record(\"map\", eval_map)\n",
"pm.record(\"ndcg\", eval_ndcg)\n",
"pm.record(\"precision\", eval_precision)\n",
"pm.record(\"recall\", eval_recall)"
"sb.glue(\"map\", eval_map)\n",
"sb.glue(\"ndcg\", eval_ndcg)\n",
"sb.glue(\"precision\", eval_precision)\n",
"sb.glue(\"recall\", eval_recall)"
]
},
{

Просмотреть файл

@ -46,6 +46,7 @@
"sys.path.append(\"../../\")\n",
"import os\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"import pandas as pd\n",
"import numpy as np\n",
"import tensorflow as tf\n",
@ -737,10 +738,10 @@
],
"source": [
"# Record results with papermill for tests\n",
"pm.record(\"map\", eval_map)\n",
"pm.record(\"ndcg\", eval_ndcg)\n",
"pm.record(\"precision\", eval_precision)\n",
"pm.record(\"recall\", eval_recall)"
"sb.glue(\"map\", eval_map)\n",
"sb.glue(\"ndcg\", eval_ndcg)\n",
"sb.glue(\"precision\", eval_precision)\n",
"sb.glue(\"recall\", eval_recall)"
]
},
{
@ -829,4 +830,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -102,6 +102,7 @@
"import os\n",
"import surprise\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"import pandas as pd\n",
"\n",
"from reco_utils.common.timer import Timer\n",
@ -671,16 +672,16 @@
],
"source": [
"# Record results with papermill for tests\n",
"pm.record(\"rmse\", eval_rmse)\n",
"pm.record(\"mae\", eval_mae)\n",
"pm.record(\"rsquared\", eval_rsquared)\n",
"pm.record(\"exp_var\", eval_exp_var)\n",
"pm.record(\"map\", eval_map)\n",
"pm.record(\"ndcg\", eval_ndcg)\n",
"pm.record(\"precision\", eval_precision)\n",
"pm.record(\"recall\", eval_recall)\n",
"pm.record(\"train_time\", train_time.interval)\n",
"pm.record(\"test_time\", test_time.interval)"
"sb.glue(\"rmse\", eval_rmse)\n",
"sb.glue(\"mae\", eval_mae)\n",
"sb.glue(\"rsquared\", eval_rsquared)\n",
"sb.glue(\"exp_var\", eval_exp_var)\n",
"sb.glue(\"map\", eval_map)\n",
"sb.glue(\"ndcg\", eval_ndcg)\n",
"sb.glue(\"precision\", eval_precision)\n",
"sb.glue(\"recall\", eval_recall)\n",
"sb.glue(\"train_time\", train_time.interval)\n",
"sb.glue(\"test_time\", test_time.interval)"
]
},
{

Просмотреть файл

@ -135,6 +135,7 @@
"from tempfile import TemporaryDirectory\n",
"import logging\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"import tensorflow as tf\n",
"\n",
"from reco_utils.dataset.download_utils import maybe_download\n",
@ -559,7 +560,7 @@
"metadata": {},
"outputs": [],
"source": [
"pm.record(\"res\", res)"
"sb.glue(\"res\", res)"
]
},
{
@ -638,4 +639,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

Просмотреть файл

@ -69,6 +69,7 @@
"from pyspark.ml import PipelineModel\n",
"from pyspark.ml.feature import FeatureHasher\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"\n",
"from reco_utils.common.spark_utils import start_or_get_spark\n",
"from reco_utils.common.notebook_utils import is_databricks\n",
@ -415,7 +416,7 @@
],
"source": [
"# Record results with papermill for tests\n",
"pm.record(\"auc\", auc)"
"sb.glue(\"auc\", auc)"
]
},
{
@ -480,4 +481,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -85,6 +85,7 @@
"\n",
"import pandas as pd\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"\n",
"from reco_utils.common.notebook_utils import is_jupyter\n",
"from reco_utils.dataset.movielens import load_pandas_df\n",
@ -1318,16 +1319,16 @@
"source": [
"# record results for testing\n",
"if is_jupyter():\n",
" pm.record('rmse', saved_result['RMSE'])\n",
" pm.record('mae', saved_result['MAE'])\n",
" pm.record('rsquared', saved_result['R2'])\n",
" pm.record('exp_var', saved_result['Explained Variance'])\n",
" pm.record(\"train_time\", saved_result['Train Time (ms)'])\n",
" pm.record(\"test_time\", test_time)\n",
" pm.record('map', rank_metrics['MAP'])\n",
" pm.record('ndcg', rank_metrics['NDCG'])\n",
" pm.record('precision', rank_metrics['Precision'])\n",
" pm.record('recall', rank_metrics['Recall'])"
" sb.glue('rmse', saved_result['RMSE'])\n",
" sb.glue('mae', saved_result['MAE'])\n",
" sb.glue('rsquared', saved_result['R2'])\n",
" sb.glue('exp_var', saved_result['Explained Variance'])\n",
" sb.glue(\"train_time\", saved_result['Train Time (ms)'])\n",
" sb.glue(\"test_time\", test_time)\n",
" sb.glue('map', rank_metrics['MAP'])\n",
" sb.glue('ndcg', rank_metrics['NDCG'])\n",
" sb.glue('precision', rank_metrics['Precision'])\n",
" sb.glue('recall', rank_metrics['Recall'])"
]
},
{
@ -1374,4 +1375,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -242,6 +242,7 @@
"sys.path.append(\"../../\")\n",
"import os\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"from tempfile import TemporaryDirectory\n",
"import xlearn as xl\n",
"from sklearn.metrics import roc_auc_score\n",
@ -582,7 +583,7 @@
}
],
"source": [
"pm.record('auc_score', auc_score)"
"sb.glue('auc_score', auc_score)"
]
},
{

Просмотреть файл

@ -47,6 +47,7 @@
"import os\n",
"import shutil\n",
"import papermill as pm\n",
"import scrapbook as sb\n",
"import pandas as pd\n",
"import numpy as np\n",
"import tensorflow as tf\n",
@ -865,14 +866,14 @@
"outputs": [],
"source": [
"# Record results with papermill for tests\n",
"pm.record(\"map\", eval_map)\n",
"pm.record(\"ndcg\", eval_ndcg)\n",
"pm.record(\"precision\", eval_precision)\n",
"pm.record(\"recall\", eval_recall)\n",
"pm.record(\"map2\", eval_map2)\n",
"pm.record(\"ndcg2\", eval_ndcg2)\n",
"pm.record(\"precision2\", eval_precision2)\n",
"pm.record(\"recall2\", eval_recall2)"
"sb.glue(\"map\", eval_map)\n",
"sb.glue(\"ndcg\", eval_ndcg)\n",
"sb.glue(\"precision\", eval_precision)\n",
"sb.glue(\"recall\", eval_recall)\n",
"sb.glue(\"map2\", eval_map2)\n",
"sb.glue(\"ndcg2\", eval_ndcg2)\n",
"sb.glue(\"precision2\", eval_precision2)\n",
"sb.glue(\"recall2\", eval_recall2)"
]
},
{
@ -938,4 +939,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -69,4 +69,4 @@ cfg = NotebookRunConfig(source_directory='../',
run_config=run_config)
```
All metrics and parameters logged with `pm.record` will be stored on the run as tracked metrics. The initial notebook that was submitted, will be stored as an output notebook ```out.ipynb``` in the outputs tab of the Azure Portal.
All metrics and parameters logged with `sb.glue` will be stored on the run as tracked metrics. The initial notebook that was submitted, will be stored as an output notebook ```out.ipynb``` in the outputs tab of the Azure Portal.

Просмотреть файл

@ -83,7 +83,8 @@
}
],
"source": [
"import papermill as pm"
"import papermill as pm\n",
"import scrapbook as sb"
]
},
{
@ -204,7 +205,7 @@
}
],
"source": [
"pm.record(\"checked_version\", checked_version)"
"sb.glue(\"checked_version\", checked_version)"
]
},
{
@ -277,4 +278,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}

Просмотреть файл

@ -187,7 +187,7 @@ The first step is to tag the parameters that we are going to inject. For it we n
The way papermill works to inject parameters is very simple, it generates a copy of the notebook (in our code we call it `OUTPUT_NOTEBOOK`), and creates a new cell with the injected variables.
The second modification that we need to do to the notebook is to record the metrics we want to test using `pm.record("output_variable", python_variable_name)`. We normally use the last cell of the notebook to record all the metrics. These are the metrics that we are going to control to in the smoke and integration tests.
The second modification that we need to do to the notebook is to record the metrics we want to test using `sb.glue("output_variable", python_variable_name)`. We normally use the last cell of the notebook to record all the metrics. These are the metrics that we are going to control to in the smoke and integration tests.
This is an example on how we do a smoke test. The complete code can be found in [tests/smoke/test_notebooks_python.py](smoke/test_notebooks_python.py):

Просмотреть файл

@ -21,17 +21,14 @@ def test_is_jupyter():
path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME,
)
nb = sb.read_notebook(OUTPUT_NOTEBOOK)
df = nb.scrap_dataframe
print(df)
a = df.loc[df["name"] == "is_jupyter", "data"]
result_is_jupyter = df.loc[df["name"] == "is_jupyter", "data"].values[0]
df = nb.papermill_dataframe
result_is_jupyter = df.loc[df["name"] == "is_jupyter", "value"].values[0]
assert result_is_jupyter
assert result_is_jupyter is True
result_is_databricks = df.loc[df["name"] == "is_databricks", "data"].values[0]
result_is_databricks = df.loc[df["name"] == "is_databricks", "value"].values[0]
assert result_is_databricks is False
# @pytest.mark.notebooks
# def test_is_databricks():
# TODO Currently, we cannot pytest modules on Databricks
test_is_jupyter()

Просмотреть файл

@ -3,6 +3,7 @@
import pytest
import papermill as pm
import scrapbook as sb
from tests.notebooks_common import OUTPUT_NOTEBOOK, KERNEL_NAME
@ -16,8 +17,8 @@ def test_template_runs(notebooks):
parameters=dict(PM_VERSION=pm.__version__),
kernel_name=KERNEL_NAME,
)
nb = pm.read_notebook(OUTPUT_NOTEBOOK)
df = nb.dataframe
nb = sb.read_notebook(OUTPUT_NOTEBOOK)
df = nb.papermill_dataframe
assert df.shape[0] == 2
check_version = df.loc[df["name"] == "checked_version", "value"].values[0]
assert check_version is True

Просмотреть файл

@ -41,7 +41,7 @@ CONDA_BASE = {
"fastparquet": "fastparquet>=0.1.6",
"ipykernel": "ipykernel>=4.6.1",
"jupyter": "jupyter>=1.0.0",
"lightfm": "lightfm>=1.15",
"lightfm": "lightfm==1.15",
"matplotlib": "matplotlib>=2.2.2",
"mock": "mock==2.0.0",
"nltk": "nltk>=3.4",
@ -57,7 +57,8 @@ CONDA_BASE = {
"swig": "swig==3.0.12",
"lightgbm": "lightgbm==2.2.1",
"cornac": "cornac>=1.1.2",
"papermill": "papermill==0.19.1",
"papermill": "papermill>=2.2.0",
"nteract-scrapbook": "nteract-scrapbook>=0.4.0",
"tqdm": "tqdm>=4.31.1",
}
@ -78,7 +79,7 @@ PIP_BASE = {
"black": "black>=18.6b4",
"category_encoders": "category_encoders>=1.3.0",
"dataclasses": "dataclasses>=0.6",
"hyperopt": "hyperopt==0.1.1",
"hyperopt": "hyperopt==0.1.2",
"idna": "idna==2.7",
"locustio": "locustio==0.11.0",
"memory-profiler": "memory-profiler>=0.54.0",