Add scripts to generate plots from paper (#186)

* Add scripts to generate plots from paper

* Style fix
This commit is contained in:
Adam J. Stewart 2021-10-10 18:31:23 -05:00 коммит произвёл GitHub
Родитель b578cb73a2
Коммит 5b77ded218
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 176 добавлений и 0 удалений

77
experiments/plot_bar_chart.py Executable file
Просмотреть файл

@ -0,0 +1,77 @@
#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
df1 = pd.read_csv("original-benchmark-results.csv")
df2 = pd.read_csv("warped-benchmark-results.csv")
mean1 = df1.groupby("sampler").mean()
mean2 = df2.groupby("sampler").mean()
cached1 = (
df1[(df1["cached"]) & (df1["sampler"] != "resnet18")].groupby("sampler").mean()
)
cached2 = (
df2[(df2["cached"]) & (df2["sampler"] != "resnet18")].groupby("sampler").mean()
)
not_cached1 = (
df1[(~df1["cached"]) & (df1["sampler"] != "resnet18")].groupby("sampler").mean()
)
not_cached2 = (
df2[(~df2["cached"]) & (df2["sampler"] != "resnet18")].groupby("sampler").mean()
)
print("cached, original\n", cached1)
print("cached, warped\n", cached2)
print("not cached, original\n", not_cached1)
print("not cached, warped\n", not_cached2)
cmap = sns.color_palette()
labels = ["GridGeoSampler", "RandomBatchGeoSampler", "RandomGeoSampler"]
fig, ax = plt.subplots()
x = np.arange(3)
width = 0.2
rects1 = ax.bar(
x - width * 3 / 2,
not_cached1["rate"],
width,
label="Raw Data, Not Cached",
color=cmap[0],
)
rects2 = ax.bar(
x - width * 1 / 2,
not_cached2["rate"],
width,
label="Preprocessed, Not Cached",
color=cmap[1],
)
rects2 = ax.bar(
x + width * 1 / 2, cached1["rate"], width, label="Raw Data, Cached", color=cmap[2]
)
rects3 = ax.bar(
x + width * 3 / 2,
cached2["rate"],
width,
label="Preprocessed, Cached",
color=cmap[3],
)
ax.set_ylabel("sampling rate (patches/sec)", fontsize=12)
ax.set_xticks(x)
ax.set_xticklabels(labels, fontsize=12)
ax.tick_params(axis="x", labelrotation=10)
ax.legend(fontsize="large")
plt.gca().spines.right.set_visible(False)
plt.gca().spines.top.set_visible(False)
plt.tight_layout()
plt.show()

Просмотреть файл

@ -0,0 +1,43 @@
#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
df = pd.read_csv("warped-benchmark-results.csv")
random_cached = df[(df["sampler"] == "RandomGeoSampler") & (df["cached"])]
random_batch_cached = df[(df["sampler"] == "RandomBatchGeoSampler") & (df["cached"])]
grid_cached = df[(df["sampler"] == "GridGeoSampler") & (df["cached"])]
other = [
("RandomGeoSampler", random_cached),
("RandomBatchGeoSampler", random_batch_cached),
("GridGeoSampler", grid_cached),
]
cmap = sns.color_palette()
ax = plt.gca()
for i, (label, df) in enumerate(other):
df = df.groupby("batch_size")
ax.plot(df.mean().index, df.mean()["rate"], color=cmap[i], label=label)
ax.fill_between(
df.mean().index, df.min()["rate"], df.max()["rate"], color=cmap[i], alpha=0.2
)
ax.set_xscale("log")
ax.set_xticks([16, 32, 64, 128, 256])
ax.set_xticklabels([16, 32, 64, 128, 256], fontsize=12)
ax.set_xlabel("batch size", fontsize=12)
ax.set_ylabel("sampling rate (patches/sec)", fontsize=12)
ax.legend(loc="center right", fontsize="large")
plt.gca().spines.right.set_visible(False)
plt.gca().spines.top.set_visible(False)
plt.tight_layout()
plt.show()

Просмотреть файл

@ -0,0 +1,56 @@
#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
df1 = pd.read_csv("original-benchmark-results.csv")
df2 = pd.read_csv("warped-benchmark-results.csv")
random_cached1 = df1[(df1["sampler"] == "RandomGeoSampler") & (df1["cached"])]
random_cached2 = df2[(df2["sampler"] == "RandomGeoSampler") & (df2["cached"])]
random_cachedp = random_cached1
random_cachedp["rate"] /= random_cached2["rate"]
random_batch_cached1 = df1[
(df1["sampler"] == "RandomBatchGeoSampler") & (df1["cached"])
]
random_batch_cached2 = df2[
(df2["sampler"] == "RandomBatchGeoSampler") & (df2["cached"])
]
random_batch_cachedp = random_batch_cached1
random_batch_cachedp["rate"] /= random_batch_cached2["rate"]
grid_cached1 = df1[(df1["sampler"] == "GridGeoSampler") & (df1["cached"])]
grid_cached2 = df2[(df2["sampler"] == "GridGeoSampler") & (df2["cached"])]
grid_cachedp = grid_cached1
grid_cachedp["rate"] /= grid_cached2["rate"]
other = [
("RandomGeoSampler (cached)", random_cachedp),
("RandomBatchGeoSampler (cached)", random_batch_cachedp),
("GridGeoSampler (cached)", grid_cachedp),
]
cmap = sns.color_palette()
ax = plt.gca()
for i, (label, df) in enumerate(other):
df = df.groupby("batch_size")
ax.plot([16, 32, 64, 128, 256], df.mean()["rate"], color=cmap[i], label=label)
ax.fill_between(
df.mean().index, df.min()["rate"], df.max()["rate"], color=cmap[i], alpha=0.2
)
ax.set_xscale("log")
ax.set_xticks([16, 32, 64, 128, 256])
ax.set_xticklabels([16, 32, 64, 128, 256])
ax.set_xlabel("batch size")
ax.set_ylabel("% sampling rate (patches/sec)")
ax.legend()
plt.show()