Cleaner benchmark header levels

This commit is contained in:
Adam J. Stewart 2021-08-20 14:39:54 -05:00
Родитель d7d901c866
Коммит b60948e598
1 изменённых файлов: 270 добавлений и 241 удалений

Просмотреть файл

@ -1,243 +1,6 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"### Imports"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"import os\r\n",
"import sys\r\n",
"import time\r\n",
"from typing import Tuple\r\n",
"\r\n",
"sys.path.append(\"../..\")\r\n",
"\r\n",
"from torch.utils.data import DataLoader\r\n",
"\r\n",
"from torchgeo.datasets import ChesapeakeDE, NAIP\r\n",
"from torchgeo.samplers import RandomGeoSampler, GridGeoSampler, RandomBatchGeoSampler\r\n",
"\r\n",
"\r\n",
"ROOT = \"/mnt/blobfuse/adam-scratch\""
],
"outputs": [],
"execution_count": 1,
"metadata": {
"gather": {
"logged": 1629238744113
}
}
},
{
"cell_type": "markdown",
"source": [
"### Timing function"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"def time_epoch(dataloader: DataLoader) -> Tuple[float, int]:\r\n",
" tic = time.time()\r\n",
" i = 0\r\n",
" for _ in dataloader:\r\n",
" i += 1\r\n",
" toc = time.time()\r\n",
" return toc - tic, i"
],
"outputs": [],
"execution_count": 2,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1629238744228
}
}
},
{
"cell_type": "markdown",
"source": [
"### RandomGeoSampler"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"for cache in [False, True]:\r\n",
" chesapeake = ChesapeakeDE(os.path.join(ROOT, \"chesapeake\", \"DE\"), cache=cache)\r\n",
" naip = NAIP(os.path.join(ROOT, \"naip\"), crs=chesapeake.crs, res=chesapeake.res, cache=cache)\r\n",
" dataset = chesapeake + naip\r\n",
" sampler = RandomGeoSampler(naip.index, size=1000, length=888)\r\n",
" dataloader = DataLoader(dataset, batch_size=12, sampler=sampler)\r\n",
" duration, count = time_epoch(dataloader)\r\n",
" print(duration, count)"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"296.582683801651 74\n",
"54.20210099220276 74\n"
]
}
],
"execution_count": 8,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1629248963725
}
}
},
{
"cell_type": "markdown",
"source": [
"### GridGeoSampler"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"for cache in [False, True]:\r\n",
" chesapeake = ChesapeakeDE(os.path.join(ROOT, \"chesapeake\", \"DE\"), cache=cache)\r\n",
" naip = NAIP(os.path.join(ROOT, \"naip\"), crs=chesapeake.crs, res=chesapeake.res, cache=cache)\r\n",
" dataset = chesapeake + naip\r\n",
" sampler = GridGeoSampler(naip.index, size=1000, stride=500)\r\n",
" dataloader = DataLoader(dataset, batch_size=12, sampler=sampler)\r\n",
" duration, count = time_epoch(dataloader)\r\n",
" print(duration, count)"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"391.90197944641113 74\n",
"118.0611424446106 74\n"
]
}
],
"execution_count": 4,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1629239313388
}
}
},
{
"cell_type": "markdown",
"source": [
"### RandomBatchGeoSampler"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"for cache in [False, True]:\r\n",
" chesapeake = ChesapeakeDE(os.path.join(ROOT, \"chesapeake\", \"DE\"), cache=cache)\r\n",
" naip = NAIP(os.path.join(ROOT, \"naip\"), crs=chesapeake.crs, res=chesapeake.res, cache=cache)\r\n",
" dataset = chesapeake + naip\r\n",
" sampler = RandomBatchGeoSampler(naip.index, size=1000, batch_size=12, length=888)\r\n",
" dataloader = DataLoader(dataset, batch_sampler=sampler)\r\n",
" duration, count = time_epoch(dataloader)\r\n",
" print(duration, count)"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"230.51380324363708 74\n",
"53.99923872947693 74\n"
]
}
],
"execution_count": 10,
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1629249843438
}
}
}
],
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"kernelspec": {
"name": "python38-azureml",
@ -261,8 +24,274 @@
},
"nteract": {
"version": "nteract-front-end@1.0.0"
},
"colab": {
"name": "benchmarking.ipynb",
"provenance": [],
"collapsed_sections": []
}
},
"nbformat": 4,
"nbformat_minor": 2
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "OFXtoHmJClRf"
},
"source": [
"# Benchmarking\n",
"\n",
"This tutorial benchmarks the performance of various sampling strategies, with and without caching."
]
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
},
"id": "hC3pauOLChi4"
},
"source": [
"## Imports"
]
},
{
"cell_type": "code",
"metadata": {
"gather": {
"logged": 1629238744113
},
"id": "gjFiws-PChi8"
},
"source": [
"import os\n",
"import sys\n",
"import time\n",
"from typing import Tuple\n",
"\n",
"sys.path.append(\"../..\")\n",
"\n",
"from torch.utils.data import DataLoader\n",
"\n",
"from torchgeo.datasets import ChesapeakeDE, NAIP\n",
"from torchgeo.samplers import RandomGeoSampler, GridGeoSampler, RandomBatchGeoSampler\n",
"\n",
"\n",
"ROOT = \"/mnt/blobfuse/adam-scratch\""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
},
"id": "n6HwpMz7Chi-"
},
"source": [
"## Timing function"
]
},
{
"cell_type": "code",
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1629238744228
},
"id": "8-z6_y2xChi-"
},
"source": [
"def time_epoch(dataloader: DataLoader) -> Tuple[float, int]:\n",
" tic = time.time()\n",
" i = 0\n",
" for _ in dataloader:\n",
" i += 1\n",
" toc = time.time()\n",
" return toc - tic, i"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
},
"id": "I3pkKYoeChi_"
},
"source": [
"## RandomGeoSampler"
]
},
{
"cell_type": "code",
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1629248963725
},
"id": "jPjIZLF7Chi_",
"outputId": "edcc8199-bd09-4832-e50c-7be8ac78995b"
},
"source": [
"for cache in [False, True]:\n",
" chesapeake = ChesapeakeDE(os.path.join(ROOT, \"chesapeake\", \"DE\"), cache=cache)\n",
" naip = NAIP(os.path.join(ROOT, \"naip\"), crs=chesapeake.crs, res=chesapeake.res, cache=cache)\n",
" dataset = chesapeake + naip\n",
" sampler = RandomGeoSampler(naip.index, size=1000, length=888)\n",
" dataloader = DataLoader(dataset, batch_size=12, sampler=sampler)\n",
" duration, count = time_epoch(dataloader)\n",
" print(duration, count)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"296.582683801651 74\n",
"54.20210099220276 74\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
},
"id": "pHqLRDA_ChjB"
},
"source": [
"## GridGeoSampler"
]
},
{
"cell_type": "code",
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1629239313388
},
"id": "K67vnCK4ChjC",
"outputId": "159ce99f-a438-4ecc-d218-9b9e28d02055"
},
"source": [
"for cache in [False, True]:\n",
" chesapeake = ChesapeakeDE(os.path.join(ROOT, \"chesapeake\", \"DE\"), cache=cache)\n",
" naip = NAIP(os.path.join(ROOT, \"naip\"), crs=chesapeake.crs, res=chesapeake.res, cache=cache)\n",
" dataset = chesapeake + naip\n",
" sampler = GridGeoSampler(naip.index, size=1000, stride=500)\n",
" dataloader = DataLoader(dataset, batch_size=12, sampler=sampler)\n",
" duration, count = time_epoch(dataloader)\n",
" print(duration, count)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"391.90197944641113 74\n",
"118.0611424446106 74\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
},
"id": "8rwjrOD1ChjD"
},
"source": [
"## RandomBatchGeoSampler"
]
},
{
"cell_type": "code",
"metadata": {
"collapsed": true,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1629249843438
},
"id": "v-N2fo6UChjE",
"outputId": "497f6869-1ab7-4db7-bbce-e943b493ca41"
},
"source": [
"for cache in [False, True]:\n",
" chesapeake = ChesapeakeDE(os.path.join(ROOT, \"chesapeake\", \"DE\"), cache=cache)\n",
" naip = NAIP(os.path.join(ROOT, \"naip\"), crs=chesapeake.crs, res=chesapeake.res, cache=cache)\n",
" dataset = chesapeake + naip\n",
" sampler = RandomBatchGeoSampler(naip.index, size=1000, batch_size=12, length=888)\n",
" dataloader = DataLoader(dataset, batch_sampler=sampler)\n",
" duration, count = time_epoch(dataloader)\n",
" print(duration, count)"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"230.51380324363708 74\n",
"53.99923872947693 74\n"
]
}
]
}
]
}