landcover-orinoquia/evaluation/evaluate.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr’\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('/Source/repos/GitHub_MSFT/landcover-orinoquiaa')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "import pickle\n",
    "from collections import defaultdict\n",
    "\n",
    "import rasterio\n",
    "from tqdm import tqdm\n",
    "import numpy as np\n",
    "from sklearn.metrics import confusion_matrix, accuracy_score\n",
    "from sklearn.calibration import calibration_curve\n",
    "import matplotlib.pyplot as plt\n",
    "from PIL import Image\n",
    "\n",
    "from geospatial.visualization.raster_label_visualizer import RasterLabelVisualizer\n",
    "\n",
    "plt.rcParams['figure.figsize'] = (10.0, 10.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from data/tile_and_mask.py - which needs to be run in the Solaris env\n",
    "\n",
    "def get_lon_lat_from_tile_name(tile_name):\n",
    "    \"\"\"Returns _lon_lat\"\"\"\n",
    "    parts = tile_name.split('_')\n",
    "    lon_lat = f'_{parts[-2]}_{parts[-1].split(\".tif\")[0]}'\n",
    "    return lon_lat"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Evaluate a tiles of model predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "viz_util = RasterLabelVisualizer('../constants/class_lists/wcs_coarse_label_map.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Empty of data',\n",
       " 'Urban and infrastructure',\n",
       " 'Agriculture',\n",
       " 'Arboreal and forestry crops',\n",
       " 'Pasture',\n",
       " 'Vegetation',\n",
       " 'Forest',\n",
       " 'Savanna',\n",
       " 'Sand, rocks and bare land',\n",
       " 'Unavailable',\n",
       " 'Swamp',\n",
       " 'Water',\n",
       " 'Seasonal savanna',\n",
       " 'Seasonally flooded savanna']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label_names = sorted(viz_util.num_to_name.items(), key=lambda x: int(x[0]))\n",
    "label_names = [i[1] for i in label_names]\n",
    "label_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "output_paths = '/Data/WCS_land_use/delivered/20200701/results_coarse_baseline_201314'\n",
    "\n",
    "mask_paths = '/Data/WCS_land_use/train_full_region_median/tiles_masks_coarse'\n",
    "\n",
    "eval_saved_to = '/Data/WCS_land_use/train_full_region_median/result_val_analysis_coarse_baseline'\n",
    "\n",
    "num_classes = viz_util.num_classes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tile_accuracies = {}\n",
    "\n",
    "cm = np.zeros((num_classes, num_classes), dtype=np.int64)\n",
    "\n",
    "true_counts = np.zeros((num_classes), dtype=np.int64)\n",
    "pred_counts = np.zeros((num_classes), dtype=np.int64)\n",
    "\n",
    "classes_present_in_gt = set()\n",
    "\n",
    "for output_tile_fn in os.listdir(output_paths):\n",
    "    if not output_tile_fn.endswith('.tif'):\n",
    "        continue\n",
    "# for output_tile_fn in ['res_wcs_orinoquia_sr_median_2013_2014-0000000000-0000022272_-68.962_6.593.tif']:\n",
    "    \n",
    "    output_tile_path = os.path.join(output_paths, output_tile_fn)\n",
    "    out_reader = rasterio.open(output_tile_path)\n",
    "    output_tile = np.array(Image.open(output_tile_path), dtype=np.uint8)\n",
    "    \n",
    "    # mask_-68.423_6.054.png\n",
    "    lon_lat = get_lon_lat_from_tile_name(output_tile_path)\n",
    "    label_mask_path = os.path.join(mask_paths, f'mask{lon_lat}.tif')\n",
    "    label_mask = np.array(Image.open(label_mask_path), dtype=np.uint8)\n",
    " \n",
    "    output = output_tile.flatten()\n",
    "    labels = label_mask.flatten()\n",
    "    \n",
    "    # mask out where labels is 0, which is outside of boundary of region\n",
    "    # and also where output is 0, which is where no imagery is available on the tile\n",
    "    # now get rid of such entries\n",
    "    labels_masked = labels * (output != 0)\n",
    "    no_label_entries = np.where(labels_masked == 0)\n",
    "    \n",
    "    labels = np.delete(labels, no_label_entries)\n",
    "    output = np.delete(output, no_label_entries)\n",
    "    \n",
    "    classes_present_in_gt.update(labels)\n",
    "    \n",
    "    tile_accuracy = accuracy_score(labels, output, normalize=True)\n",
    "    tile_accuracies[lon_lat] = tile_accuracy\n",
    "\n",
    "    for y_true, y_pred in tqdm(zip(labels, output)):\n",
    "        cm[y_true][y_pred] += 1\n",
    "        true_counts[y_true] += 1\n",
    "        pred_counts[y_pred] += 1\n",
    "    \n",
    "overall_accuracy = sum(tile_accuracies.values())/len(tile_accuracies)\n",
    "print(f'Overall accuracy is {overall_accuracy}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tile_accuracies"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Accurate distribution of land types\n",
    "The shapefile's area attribute did not look correct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "true_counts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Confusion matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# normalize by ground truth label counts\n",
    "cm_norm = np.zeros((num_classes, num_classes), dtype=np.float)\n",
    "for y_true in range(num_classes):\n",
    "    for y_pred in range(num_classes):\n",
    "        if true_counts[y_true] == 0:\n",
    "            cm_norm[y_true][y_pred] = 0.0\n",
    "        else:\n",
    "            cm_norm[y_true][y_pred] = cm[y_true][y_pred] / true_counts[y_true]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# docs: https://matplotlib.org/3.1.3/gallery/images_contours_and_fields/image_annotated_heatmap.html#sphx-glr-gallery-images-contours-and-fields-image-annotated-heatmap-py\n",
    "\n",
    "cm_to_plot = cm_norm\n",
    "\n",
    "\n",
    "fig = plt.figure(figsize=(10, 10), dpi=200)  # set dpi to 300 to look good\n",
    "ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])\n",
    "im = ax.matshow(cm_to_plot, cmap=plt.cm.YlGnBu)\n",
    "_ = ax.set_xticks(np.array(range(num_classes)))\n",
    "_ = ax.set_yticks(np.array(range(num_classes)))\n",
    "_ = ax.set_xticklabels(label_names)\n",
    "_ = ax.set_yticklabels(label_names)\n",
    "_ = ax.set_ylabel('Provided labels')\n",
    "_ = ax.set_xlabel('Predicted by model')\n",
    "ax.xaxis.tick_top()\n",
    "\n",
    "# Rotate the tick labels\n",
    "_ = plt.setp(ax.get_xticklabels(), rotation=90)\n",
    "\n",
    "_ = ax.set_xticks(np.array(range(num_classes)) - 0.5, minor=True)\n",
    "_ = ax.set_yticks(np.array(range(num_classes)) - 0.5, minor=True)\n",
    "ax.grid(which='minor', color='white', linestyle='-', linewidth=3)\n",
    "\n",
    "cbar = ax.figure.colorbar(im, ax=ax)\n",
    "\n",
    "# no border\n",
    "for edge, spine in ax.spines.items():\n",
    "        spine.set_visible(False)\n",
    "\n",
    "# right-click save - layout isn't right otherwise\n",
    "        \n",
    "#fig.tight_layout()\n",
    "#plt.savefig('/Users/siyuyang/Source/temp_data/WCS_land_use/train_200218/result_val/evaluation/cm.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cm_norm[32][32]\n",
    "cm_norm[33][33]\n",
    "\n",
    "cm_norm[30, 33]  # row, col - ground truth, predicted\n",
    "cm_norm[33][30] "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Side by side label and output counts, in log scale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Per-class accuracy, precision and recall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# per-class accuracy\n",
    "total_obs = cm.sum()\n",
    "\n",
    "per_class_accuracy = {}\n",
    "per_class_recall = {}\n",
    "per_class_precision = {}\n",
    "\n",
    "for cls in range(num_classes):\n",
    "    if cls not in classes_present_in_gt:\n",
    "        continue\n",
    "        \n",
    "    true_pos = cm[cls, cls]\n",
    "    \n",
    "    true_neg = total_obs - cm[cls, :].sum() - cm[:, cls].sum() + true_pos\n",
    "    \n",
    "    false_pos = cm[:, cls].sum() - true_pos\n",
    "    \n",
    "    false_neg = cm[cls, :].sum() - true_pos\n",
    "    \n",
    "    per_class_accuracy[cls] = (true_pos + true_neg) / total_obs\n",
    "    \n",
    "    per_class_precision[cls] = true_pos / (true_pos + false_pos)\n",
    "    \n",
    "    per_class_recall[cls] = true_pos / (true_pos + false_neg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print('Category, Accuracy, Precision, Recall')\n",
    "for cls, acc in per_class_accuracy.items():\n",
    "    prec = per_class_precision[cls]\n",
    "    recall = per_class_recall[cls]\n",
    "    print(f'{cls} {viz_util.num_to_name[str(cls)]},{acc},{prec},{recall}')\n",
    "    \n",
    "# paste the result into Pages, and fix the row for \"27 Lakes, lagoons, and natural cienaga\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Since the dataset is so unbalanced (mostly 12 - dense forest) and accuracy counts \"true negatives\" as a win, this is not a good measure of performance."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Save the evaluation findings - not yet done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "saved = {\n",
    "    'overall_accuracy': overall_accuracy,\n",
    "    'per_class_accuracy': per_class_accuracy,\n",
    "    # 'calibration_summary': calibration_summary\n",
    "}\n",
    "\n",
    "with open(eval_saved_to, 'w') as f:\n",
    "    json.dump(saved, f, indent=4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Is the model well-calibrated?\n",
    "\n",
    "We can also just record a 2D shape - each cell is the confidence of the most confident class?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(output_scores_path, 'rb') as f:\n",
    "    dict_scores = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "classes_to_plot = [0, 11, 12, 17, 19, 26, 32]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_true = defaultdict(list)\n",
    "y_prob = defaultdict(list)\n",
    "\n",
    "for window, chip_scores in tqdm(dict_scores.items()):\n",
    "    # rasterio window is (col_off x, row_off y, width, height)\n",
    "    \n",
    "    chip_scores = chip_scores.squeeze()  # chip_scores have shape (1, 33, 256, 256)\n",
    "    chip_scores = chip_scores.reshape((33, -1))\n",
    "\n",
    "    chip_labels = label_mask[window[0]:window[0] + 256, window[1]:window[1] + 256]\n",
    "    chip_labels = chip_labels.reshape((1, -1))\n",
    "    # we pad 0 to the end of chips after the tile ends\n",
    "    chip_labels = np.pad(chip_labels, ((0, 0), (0, 256*256 - chip_labels.shape[1]))).squeeze()\n",
    "    \n",
    "    assert chip_scores.shape == (33, 256*256), chip_scores.shape\n",
    "    assert chip_labels.shape == (256*256,), chip_labels.shape\n",
    "    \n",
    "    for cls in classes_to_plot:\n",
    "        cls_y_true = chip_labels == cls\n",
    "        cls_y_prob = chip_scores[cls]\n",
    "        assert len(list(cls_y_true)) == len(list(cls_y_prob)), '{}, {}'.format(\n",
    "            len(list(cls_y_true)), len(list(cls_y_prob))\n",
    "        )\n",
    "        y_true[cls].extend(list(cls_y_true))\n",
    "        y_prob[cls].extend(list(cls_y_prob))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(y_true[12])\n",
    "len(y_prob[12])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "_ = plt.plot([0.0, 1.0], color='grey', linestyle=':')\n",
    "\n",
    "for cls in classes_to_plot:\n",
    "    _ = frac_positives, mean_prob_in_bin = calibration_curve(y_true[cls], y_prob[cls], n_bins=10)\n",
    "    _ = plt.plot(mean_prob_in_bin, frac_positives, label=cls, color=viz_util.num_to_color[str(cls)])\n",
    "_ = plt.legend()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mean_prob_in_bin\n",
    "frac_positives"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Expected number of pixels for the whole validation area"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "probability_sum = np.zeros(num_classes, dtype=np.float)\n",
    "\n",
    "for window, chip_scores in dict_scores.items():\n",
    "    # print(chip_scores.shape)  # (1, 33, 256, 256)\n",
    "    chip_scores = chip_scores.squeeze()\n",
    "    chip_scores = chip_scores.sum(axis=(1, 2))  # height and width dims\n",
    "    probability_sum += chip_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "calibration_summary = {}\n",
    "\n",
    "for cls, (prob_sum, label_sum) in enumerate(zip(probability_sum, true_counts)):\n",
    "    calibration_summary[cls] = {\n",
    "        'prediction_probability_sum': prob_sum,\n",
    "        'label_sum': int(label_sum)\n",
    "    }\n",
    "    print('Class {} - {}, prob_sum {}, label_sum {}'.format(cls, viz_util.num_to_name[str(cls)], round(prob_sum), label_sum))\n",
    "    if label_sum > 0:\n",
    "        print('    diff is {}%'.format(100 * round((prob_sum - label_sum)/label_sum, 3)))\n",
    "        calibration_summary[cls]['difference_wrt_label_sum'] = (prob_sum - label_sum)/label_sum"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:wcs] *",
   "language": "python",
   "name": "conda-env-wcs-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}