зеркало из https://github.com/mozilla/PRESC.git
Add initial report template
This commit is contained in:
Родитель
1d7f7aecb1
Коммит
bca724cd5d
|
@ -0,0 +1,20 @@
|
|||
# Report output
|
||||
|
||||
The report generated by PRESC is built using [Jupyter Book](https://jupyterbook.org/intro.html).
|
||||
Report source is maintained under [`presc/report`](../presc/report/).
|
||||
|
||||
To build the report, run:
|
||||
|
||||
```bash
|
||||
# Hide all the code cells.
|
||||
# Currently this has to be done by manually adding tags to each notebook cell.
|
||||
python presc/report/add_tags.py
|
||||
|
||||
# Build
|
||||
jupyter-book build presc/report
|
||||
```
|
||||
|
||||
The rendered report can be viewed at
|
||||
[`presc/report/_build/html/index.html`](../presc/report/_build/html/index.html).
|
||||
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
title: PRESC Report
|
||||
author: ""
|
||||
|
||||
# Force re-execution of notebooks on each build.
|
||||
execute:
|
||||
execute_notebooks: force
|
||||
|
||||
# Information about where the book exists on the web
|
||||
repository:
|
||||
url: https://github.com/mozilla/PRESC
|
||||
path_to_book: presc/report
|
||||
branch: master
|
||||
|
||||
# Add GitHub buttons
|
||||
html:
|
||||
use_issues_button: false
|
||||
use_repository_button: true
|
||||
|
||||
# Additional launch buttons
|
||||
launch_buttons:
|
||||
binderhub_url: ""
|
|
@ -0,0 +1,5 @@
|
|||
- file: landing
|
||||
|
||||
- part: Misclassifications
|
||||
chapters:
|
||||
- file: misclass_rate
|
|
@ -0,0 +1,25 @@
|
|||
"""Add tags to all code cells in the notebooks."""
|
||||
|
||||
import nbformat as nbf
|
||||
from pathlib import Path
|
||||
|
||||
TAGS_ALL_CELLS = ["remove-input"]
|
||||
REPORT_DIR = Path(__file__).parent
|
||||
|
||||
notebooks = REPORT_DIR.glob("*.ipynb")
|
||||
|
||||
for ipath in notebooks:
|
||||
print(f"Adding tags in {ipath}")
|
||||
ntbk = nbf.read(ipath, nbf.NO_CONVERT)
|
||||
for cell in ntbk.cells:
|
||||
if cell.cell_type != "code":
|
||||
continue
|
||||
|
||||
cell_tags = cell.metadata.get("tags", [])
|
||||
for tag_to_add in TAGS_ALL_CELLS:
|
||||
if tag_to_add not in cell_tags:
|
||||
cell_tags.append(tag_to_add)
|
||||
if len(cell_tags) > 0:
|
||||
cell.metadata["tags"] = cell_tags
|
||||
|
||||
nbf.write(ntbk, ipath)
|
|
@ -0,0 +1,171 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PRESC Report"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics import classification_report, plot_confusion_matrix"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load the inputs to the report.\n",
|
||||
"#\n",
|
||||
"# This creates:\n",
|
||||
"# - the Dataset instance `dataset`\n",
|
||||
"# - the ClassificationModel instance `cm`\n",
|
||||
"# - a dict of config options `config`.\n",
|
||||
"#\n",
|
||||
"# Imports are also all managed from here.\n",
|
||||
"\n",
|
||||
"%run setup_report.py"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset.raw_dataset.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f\"{len(dataset.features.columns)} features:\\n\")\n",
|
||||
"print(dataset.features.dtypes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f\"Training set size: {dataset.train_labels.size:,}\")\n",
|
||||
"print(\n",
|
||||
" f\"Test set size: {dataset.test_labels.size:,}\" +\n",
|
||||
" f\" ({dataset.test_labels.size / dataset.labels.size:.1%})\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Classifier"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cm.classifier"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Top level metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(classification_report(cm.dataset.test_labels, cm.test_predictions))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"_ = plot_confusion_matrix(cm.classifier, cm.dataset.test_features, cm.dataset.test_labels)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Conditional misclassification rate\n",
|
||||
"\n",
|
||||
"This shows how the misclassification rate varies across the values of each feature."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from presc.evaluations._example_misclass_rate import MisclassRateEvaluation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TOOD: how to share state between notebooks\n",
|
||||
"\n",
|
||||
"%run setup_report.py"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mre = MisclassRateEvaluation(cm, config=config.get(\"misclass_rate\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remove-input"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mre.display()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
"""Prepare the inputs to the PRESC report."""
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from presc.dataset import Dataset
|
||||
from presc.model import ClassificationModel
|
||||
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.svm import SVC
|
||||
|
||||
# Better quality plots
|
||||
from IPython.display import set_matplotlib_formats
|
||||
|
||||
set_matplotlib_formats("svg")
|
||||
|
||||
# Load the dataset.
|
||||
|
||||
df = pd.read_csv("../../datasets/winequality.csv")
|
||||
df = df.drop(columns=["quality"])
|
||||
|
||||
dataset = Dataset(df, label="recommend")
|
||||
dataset.split_test_train(0.3)
|
||||
|
||||
# Set up the model
|
||||
|
||||
model = Pipeline([("scaler", StandardScaler()), ("clf", SVC(class_weight="balanced"))])
|
||||
cm = ClassificationModel(model, dataset, should_train=True)
|
||||
|
||||
# Config options (TODO: read from file)
|
||||
config = {"misclass_rate": {"num_bins": 20}}
|
Загрузка…
Ссылка в новой задаче