Performance function name in scatterplots (#36)

* Create performanceFunctionLabel prop

* Add performance_metric parameter to compatibility_analysis API, display the name in scatter plot.

* Rename performanceFunctionLabel prop -> performanceMetric for consistency

* Don't rename model_accuracy to Accuracy. Metric name centered.

* Documentation for performance metric function

Co-authored-by: Nicholas King <v-nicki@microsoft.com>
This commit is contained in:
Nicholas King 2020-10-23 13:53:14 -07:00 коммит произвёл Xavier Fernandes
Родитель 47f5b3f893
Коммит 071c32fd40
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 1B011D38C073A7F2
6 изменённых файлов: 67 добавлений и 60 удалений

Просмотреть файл

@ -5,8 +5,8 @@ import copy
import json
import torch
import numpy as np
from sklearn.metrics import accuracy_score
import backwardcompatibilityml.scores as scores
from backwardcompatibilityml.metrics import model_accuracy
def train_epoch(epoch, network, optimizer, loss_function, training_set, batch_size_train,
@ -426,36 +426,13 @@ def compatibility_scores(h1, h2, dataset, device="cpu"):
return btc_dataset, bec_dataset
def model_accuracy(model, dataset, device="cpu"):
number_of_batches = len(dataset)
model_performance = 0
with torch.no_grad():
for data, target in dataset:
if device != "cpu":
data = data.to(device)
target = target.to(device)
_, _, output_logsoftmax = model(data)
output_labels = torch.argmax(output_logsoftmax, 1)
if device != "cpu":
output_labels = output_labels.cpu()
target = target.cpu()
performance = accuracy_score(output_labels.numpy(), target.numpy())
model_performance += performance
# _clean_from_gpu([data, target])
model_performance /= number_of_batches
return model_performance
def evaluate_model_performance_and_compatibility_on_dataset(h1, h2, dataset, performance_metric=None,
def evaluate_model_performance_and_compatibility_on_dataset(h1, h2, dataset, performance_metric,
device="cpu"):
"""
Args:
h1: The reference model being used.
h2: The model being traind / updated.
performance_metric: Optional performance metric to be used when evaluating the model.
If not specified then accuracy is used.
performance_metric: Performance metric to be used when evaluating the model.
device: A string with values either "cpu" or "cuda" to indicate the
device that Pytorch is performing training on. By default this
value is "cpu". But in case your models reside on the GPU, make sure
@ -498,25 +475,7 @@ def evaluate_model_performance_and_compatibility_on_dataset(h1, h2, dataset, per
"incompatibleFraction": error_fraction
})
if performance_metric is not None:
h2_performance = performance_metric(h2, dataset)
else:
h2_performance = 0
with torch.no_grad():
for data, target in dataset:
if device != "cpu":
data = data.to(device)
target = target.to(device)
_, _, output_logsoftmax = h2(data)
output_labels = torch.argmax(output_logsoftmax, 1)
if device != "cpu":
output_labels = output_labels.cpu()
target = target.cpu()
performance = accuracy_score(output_labels.numpy(), target.numpy())
h2_performance += performance
# _clean_from_gpu([data, target])
h2_performance /= number_of_batches
h2_performance = performance_metric(h2, dataset, device)
btc, bec = compatibility_scores(h1, h2, dataset, device=device)
@ -535,7 +494,7 @@ def evaluate_model_performance_and_compatibility_on_dataset(h1, h2, dataset, per
}
def evaluate_model_performance_and_compatibility(h1, h2, training_set, test_set, performance_metric=None,
def evaluate_model_performance_and_compatibility(h1, h2, training_set, test_set, performance_metric,
device="cpu"):
"""
Calculate the error overlap of h1 and h2 on a batched dataset.
@ -544,8 +503,7 @@ def evaluate_model_performance_and_compatibility(h1, h2, training_set, test_set,
Args:
h1: The reference model being used.
h2: The model being traind / updated.
performance_metric: Optional performance metric to be used when evaluating the model.
If not specified then accuracy is used.
performance_metric: Performance metric to be used when evaluating the model.
training_set: The list of batched training samples as (input, target) pairs.
test_set: The list of batched testing samples as (input, target) pairs.
device: A string with values either "cpu" or "cuda" to indicate the
@ -559,11 +517,11 @@ def evaluate_model_performance_and_compatibility(h1, h2, training_set, test_set,
"""
training_set_performance_and_compatibility =\
evaluate_model_performance_and_compatibility_on_dataset(
h1, h2, training_set, performance_metric=performance_metric,
h1, h2, training_set, performance_metric,
device=device)
testing_set_performance_and_compatibility =\
evaluate_model_performance_and_compatibility_on_dataset(
h1, h2, test_set, performance_metric=performance_metric,
h1, h2, test_set, performance_metric,
device=device)
return {
@ -646,7 +604,7 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
training_set, test_set, batch_size_train, batch_size_test,
OptimizerClass, optimizer_kwargs,
NewErrorLossClass, StrictImitationLossClass,
performance_metric=None,
performance_metric=model_accuracy,
lambda_c_stepsize=0.25, percent_complete_queue=None,
new_error_loss_kwargs=None,
strict_imitation_loss_kwargs=None,
@ -676,8 +634,13 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
StrictImitationLossClass: The class of the Strict Imitation style loss
function to be instantiated and used to perform compatibility
constrained training of our model h2.
performance_metric: Optional performance metric to be used when evaluating the model.
If not specified then accuracy is used.
performance_metric: A function to evaluate model performance. The function is
expected to have the following signature:
metric(model, dataset, device)
model: The model being evaluated
dataset: The dataset as a list of (input, target) pairs
device: The device Pytorch is using for training - "cpu" or "cuda"
If unspecified, then accuracy is used.
lambda_c_stepsize: The increments of lambda_c to use as we sweep the parameter
space between 0.0 and 1.0.
percent_complete_queue: Optional thread safe queue to use for logging the
@ -714,7 +677,7 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
training_set_performance_and_compatibility =\
evaluate_model_performance_and_compatibility_on_dataset(
h1, h2_new_error, training_set, performance_metric=performance_metric,
h1, h2_new_error, training_set, performance_metric,
device=device)
training_set_performance_and_compatibility["lambda_c"] = lambda_c
training_set_performance_and_compatibility["training"] = True
@ -739,7 +702,7 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
testing_set_performance_and_compatibility =\
evaluate_model_performance_and_compatibility_on_dataset(
h1, h2_new_error, test_set, performance_metric=performance_metric,
h1, h2_new_error, test_set, performance_metric,
device=device)
testing_set_performance_and_compatibility["lambda_c"] = lambda_c
testing_set_performance_and_compatibility["training"] = False
@ -774,7 +737,7 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
training_set_performance_and_compatibility =\
evaluate_model_performance_and_compatibility_on_dataset(
h1, h2_strict_imitation, training_set, performance_metric=performance_metric,
h1, h2_strict_imitation, training_set, performance_metric,
device=device)
training_set_performance_and_compatibility["lambda_c"] = lambda_c
training_set_performance_and_compatibility["training"] = True
@ -799,7 +762,7 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
testing_set_performance_and_compatibility =\
evaluate_model_performance_and_compatibility_on_dataset(
h1, h2_new_error, test_set, performance_metric=performance_metric,
h1, h2_new_error, test_set, performance_metric,
device=device)
testing_set_performance_and_compatibility["lambda_c"] = lambda_c
testing_set_performance_and_compatibility["training"] = False

Просмотреть файл

@ -0,0 +1,26 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import torch
from sklearn.metrics import accuracy_score
def model_accuracy(model, dataset, device="cpu"):
model_performance = 0
number_of_batches = len(dataset)
with torch.no_grad():
for data, target in dataset:
if device != "cpu":
data = data.to(device)
target = target.to(device)
_, _, output_logsoftmax = model(data)
output_labels = torch.argmax(output_logsoftmax, 1)
if device != "cpu":
output_labels = output_labels.cpu()
target = target.cpu()
performance = accuracy_score(output_labels.numpy(), target.numpy())
model_performance += performance
# _clean_from_gpu([data, target])
model_performance /= number_of_batches
return model_performance

Просмотреть файл

@ -6,6 +6,7 @@ import json
import threading
from queue import Queue
from backwardcompatibilityml.helpers import training
from backwardcompatibilityml.metrics import model_accuracy
class SweepManager(object):
@ -58,6 +59,7 @@ class SweepManager(object):
NewErrorLossClass, StrictImitationLossClass, lambda_c_stepsize=0.25,
new_error_loss_kwargs=None,
strict_imitation_loss_kwargs=None,
performance_metric=model_accuracy,
device="cpu"):
self.folder_name = folder_name
self.number_of_epochs = number_of_epochs
@ -71,6 +73,7 @@ class SweepManager(object):
self.optimizer_kwargs = optimizer_kwargs
self.NewErrorLossClass = NewErrorLossClass
self.StrictImitationLossClass = StrictImitationLossClass
self.performance_metric = performance_metric
self.lambda_c_stepsize = lambda_c_stepsize
self.new_error_loss_kwargs = new_error_loss_kwargs
self.strict_imitation_loss_kwargs = strict_imitation_loss_kwargs
@ -83,7 +86,8 @@ class SweepManager(object):
self.training_set, self.test_set,
self.batch_size_train, self.batch_size_test,
self.OptimizerClass, self.optimizer_kwargs,
self.NewErrorLossClass, self.StrictImitationLossClass,),
self.NewErrorLossClass, self.StrictImitationLossClass,
self.performance_metric,),
kwargs={
"lambda_c_stepsize": self.lambda_c_stepsize,
"percent_complete_queue": self.percent_complete_queue,
@ -116,6 +120,7 @@ class SweepManager(object):
def get_sweep_summary(self):
sweep_summary = {
"h1_performance": None,
"performance_metric": self.performance_metric.__name__,
"data": []
}

Просмотреть файл

@ -12,6 +12,7 @@ import torch.optim as optim
from flask import Response
from backwardcompatibilityml import loss
from backwardcompatibilityml.sweep_management import SweepManager
from backwardcompatibilityml.metrics import model_accuracy
from rai_core_flask.flask_helper import FlaskHelper
from rai_core_flask.environments import (
AzureNBEnvironment,
@ -169,6 +170,13 @@ class CompatibilityAnalysis(object):
StrictImitationLossClass: The class of the Strict Imitation style loss
function to be instantiated and used to perform compatibility
constrained training of our model h2.
performance_metric: A function to evaluate model performance. The function is
expected to have the following signature:
metric(model, dataset, device)
model: The model being evaluated
dataset: The dataset as a list of (input, target) pairs
device: The device Pytorch is using for training - "cpu" or "cuda"
If unspecified, then accuracy is used.
port: An integer value to indicate the port to which the Flask service
should bind.
device: A string with values either "cpu" or "cuda" to indicate the
@ -182,6 +190,7 @@ class CompatibilityAnalysis(object):
batch_size_train, batch_size_test, lambda_c_stepsize=0.25,
OptimizerClass=None, optimizer_kwargs=None,
NewErrorLossClass=None, StrictImitationLossClass=None,
performance_metric=model_accuracy,
port=None, new_error_loss_kwargs=None,
strict_imitation_loss_kwargs=None, device="cpu"):
if OptimizerClass is None:
@ -211,6 +220,7 @@ class CompatibilityAnalysis(object):
lambda_c_stepsize=lambda_c_stepsize,
new_error_loss_kwargs=new_error_loss_kwargs,
strict_imitation_loss_kwargs=strict_imitation_loss_kwargs,
performance_metric=performance_metric,
device=device)
self.flask_service = FlaskHelper(ip="0.0.0.0", port=port)

Просмотреть файл

@ -103,6 +103,7 @@ function Container({
<PerformanceCompatibility
data={data.data}
h1Performance={data.h1_performance}
performanceMetric={data.performance_metric}
training={training}
testing={testing}
newError={newError}
@ -115,6 +116,7 @@ function Container({
<PerformanceCompatibility
data={data.data}
h1Performance={data.h1_performance}
performanceMetric={data.performance_metric}
training={training}
testing={testing}
newError={newError}

Просмотреть файл

@ -25,6 +25,7 @@ type PerformanceCompatibilityProps = {
strictImitation: boolean,
selectedDataPoint: any,
compatibilityScoreType: string,
performanceMetric: string,
selectDataPoint: (d: any) => void,
getModelEvaluationData: (evaluationId: number) => void
}
@ -170,11 +171,11 @@ class PerformanceCompatibility extends Component<PerformanceCompatibilityProps,
.append('text')
.attr('id', 'yAxisLabel')
.attr('transform','rotate(-90)')
.attr('x',-h/2)
.attr('x',-h/2+2.5*this.props.performanceMetric.length)
.attr('y',-50)
.attr('dy','.71em')
.style('text-anchor','end')
.text('Performance')
.text(this.props.performanceMetric)
.attr("font-family", "sans-serif")
.attr("font-size", "20px")
.attr("fill", "black");