Performance function name in scatterplots (#36)
* Create performanceFunctionLabel prop * Add performance_metric parameter to compatibility_analysis API, display the name in scatter plot. * Rename performanceFunctionLabel prop -> performanceMetric for consistency * Don't rename model_accuracy to Accuracy. Metric name centered. * Documentation for performance metric function Co-authored-by: Nicholas King <v-nicki@microsoft.com>
This commit is contained in:
Родитель
47f5b3f893
Коммит
071c32fd40
|
@ -5,8 +5,8 @@ import copy
|
|||
import json
|
||||
import torch
|
||||
import numpy as np
|
||||
from sklearn.metrics import accuracy_score
|
||||
import backwardcompatibilityml.scores as scores
|
||||
from backwardcompatibilityml.metrics import model_accuracy
|
||||
|
||||
|
||||
def train_epoch(epoch, network, optimizer, loss_function, training_set, batch_size_train,
|
||||
|
@ -426,36 +426,13 @@ def compatibility_scores(h1, h2, dataset, device="cpu"):
|
|||
return btc_dataset, bec_dataset
|
||||
|
||||
|
||||
def model_accuracy(model, dataset, device="cpu"):
|
||||
number_of_batches = len(dataset)
|
||||
model_performance = 0
|
||||
with torch.no_grad():
|
||||
for data, target in dataset:
|
||||
if device != "cpu":
|
||||
data = data.to(device)
|
||||
target = target.to(device)
|
||||
_, _, output_logsoftmax = model(data)
|
||||
output_labels = torch.argmax(output_logsoftmax, 1)
|
||||
if device != "cpu":
|
||||
output_labels = output_labels.cpu()
|
||||
target = target.cpu()
|
||||
performance = accuracy_score(output_labels.numpy(), target.numpy())
|
||||
model_performance += performance
|
||||
# _clean_from_gpu([data, target])
|
||||
|
||||
model_performance /= number_of_batches
|
||||
|
||||
return model_performance
|
||||
|
||||
|
||||
def evaluate_model_performance_and_compatibility_on_dataset(h1, h2, dataset, performance_metric=None,
|
||||
def evaluate_model_performance_and_compatibility_on_dataset(h1, h2, dataset, performance_metric,
|
||||
device="cpu"):
|
||||
"""
|
||||
Args:
|
||||
h1: The reference model being used.
|
||||
h2: The model being traind / updated.
|
||||
performance_metric: Optional performance metric to be used when evaluating the model.
|
||||
If not specified then accuracy is used.
|
||||
performance_metric: Performance metric to be used when evaluating the model.
|
||||
device: A string with values either "cpu" or "cuda" to indicate the
|
||||
device that Pytorch is performing training on. By default this
|
||||
value is "cpu". But in case your models reside on the GPU, make sure
|
||||
|
@ -498,25 +475,7 @@ def evaluate_model_performance_and_compatibility_on_dataset(h1, h2, dataset, per
|
|||
"incompatibleFraction": error_fraction
|
||||
})
|
||||
|
||||
if performance_metric is not None:
|
||||
h2_performance = performance_metric(h2, dataset)
|
||||
else:
|
||||
h2_performance = 0
|
||||
with torch.no_grad():
|
||||
for data, target in dataset:
|
||||
if device != "cpu":
|
||||
data = data.to(device)
|
||||
target = target.to(device)
|
||||
_, _, output_logsoftmax = h2(data)
|
||||
output_labels = torch.argmax(output_logsoftmax, 1)
|
||||
if device != "cpu":
|
||||
output_labels = output_labels.cpu()
|
||||
target = target.cpu()
|
||||
performance = accuracy_score(output_labels.numpy(), target.numpy())
|
||||
h2_performance += performance
|
||||
# _clean_from_gpu([data, target])
|
||||
|
||||
h2_performance /= number_of_batches
|
||||
h2_performance = performance_metric(h2, dataset, device)
|
||||
|
||||
btc, bec = compatibility_scores(h1, h2, dataset, device=device)
|
||||
|
||||
|
@ -535,7 +494,7 @@ def evaluate_model_performance_and_compatibility_on_dataset(h1, h2, dataset, per
|
|||
}
|
||||
|
||||
|
||||
def evaluate_model_performance_and_compatibility(h1, h2, training_set, test_set, performance_metric=None,
|
||||
def evaluate_model_performance_and_compatibility(h1, h2, training_set, test_set, performance_metric,
|
||||
device="cpu"):
|
||||
"""
|
||||
Calculate the error overlap of h1 and h2 on a batched dataset.
|
||||
|
@ -544,8 +503,7 @@ def evaluate_model_performance_and_compatibility(h1, h2, training_set, test_set,
|
|||
Args:
|
||||
h1: The reference model being used.
|
||||
h2: The model being traind / updated.
|
||||
performance_metric: Optional performance metric to be used when evaluating the model.
|
||||
If not specified then accuracy is used.
|
||||
performance_metric: Performance metric to be used when evaluating the model.
|
||||
training_set: The list of batched training samples as (input, target) pairs.
|
||||
test_set: The list of batched testing samples as (input, target) pairs.
|
||||
device: A string with values either "cpu" or "cuda" to indicate the
|
||||
|
@ -559,11 +517,11 @@ def evaluate_model_performance_and_compatibility(h1, h2, training_set, test_set,
|
|||
"""
|
||||
training_set_performance_and_compatibility =\
|
||||
evaluate_model_performance_and_compatibility_on_dataset(
|
||||
h1, h2, training_set, performance_metric=performance_metric,
|
||||
h1, h2, training_set, performance_metric,
|
||||
device=device)
|
||||
testing_set_performance_and_compatibility =\
|
||||
evaluate_model_performance_and_compatibility_on_dataset(
|
||||
h1, h2, test_set, performance_metric=performance_metric,
|
||||
h1, h2, test_set, performance_metric,
|
||||
device=device)
|
||||
|
||||
return {
|
||||
|
@ -646,7 +604,7 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
|
|||
training_set, test_set, batch_size_train, batch_size_test,
|
||||
OptimizerClass, optimizer_kwargs,
|
||||
NewErrorLossClass, StrictImitationLossClass,
|
||||
performance_metric=None,
|
||||
performance_metric=model_accuracy,
|
||||
lambda_c_stepsize=0.25, percent_complete_queue=None,
|
||||
new_error_loss_kwargs=None,
|
||||
strict_imitation_loss_kwargs=None,
|
||||
|
@ -676,8 +634,13 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
|
|||
StrictImitationLossClass: The class of the Strict Imitation style loss
|
||||
function to be instantiated and used to perform compatibility
|
||||
constrained training of our model h2.
|
||||
performance_metric: Optional performance metric to be used when evaluating the model.
|
||||
If not specified then accuracy is used.
|
||||
performance_metric: A function to evaluate model performance. The function is
|
||||
expected to have the following signature:
|
||||
metric(model, dataset, device)
|
||||
model: The model being evaluated
|
||||
dataset: The dataset as a list of (input, target) pairs
|
||||
device: The device Pytorch is using for training - "cpu" or "cuda"
|
||||
If unspecified, then accuracy is used.
|
||||
lambda_c_stepsize: The increments of lambda_c to use as we sweep the parameter
|
||||
space between 0.0 and 1.0.
|
||||
percent_complete_queue: Optional thread safe queue to use for logging the
|
||||
|
@ -714,7 +677,7 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
|
|||
|
||||
training_set_performance_and_compatibility =\
|
||||
evaluate_model_performance_and_compatibility_on_dataset(
|
||||
h1, h2_new_error, training_set, performance_metric=performance_metric,
|
||||
h1, h2_new_error, training_set, performance_metric,
|
||||
device=device)
|
||||
training_set_performance_and_compatibility["lambda_c"] = lambda_c
|
||||
training_set_performance_and_compatibility["training"] = True
|
||||
|
@ -739,7 +702,7 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
|
|||
|
||||
testing_set_performance_and_compatibility =\
|
||||
evaluate_model_performance_and_compatibility_on_dataset(
|
||||
h1, h2_new_error, test_set, performance_metric=performance_metric,
|
||||
h1, h2_new_error, test_set, performance_metric,
|
||||
device=device)
|
||||
testing_set_performance_and_compatibility["lambda_c"] = lambda_c
|
||||
testing_set_performance_and_compatibility["training"] = False
|
||||
|
@ -774,7 +737,7 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
|
|||
|
||||
training_set_performance_and_compatibility =\
|
||||
evaluate_model_performance_and_compatibility_on_dataset(
|
||||
h1, h2_strict_imitation, training_set, performance_metric=performance_metric,
|
||||
h1, h2_strict_imitation, training_set, performance_metric,
|
||||
device=device)
|
||||
training_set_performance_and_compatibility["lambda_c"] = lambda_c
|
||||
training_set_performance_and_compatibility["training"] = True
|
||||
|
@ -799,7 +762,7 @@ def compatibility_sweep(sweeps_folder_path, number_of_epochs, h1, h2,
|
|||
|
||||
testing_set_performance_and_compatibility =\
|
||||
evaluate_model_performance_and_compatibility_on_dataset(
|
||||
h1, h2_new_error, test_set, performance_metric=performance_metric,
|
||||
h1, h2_new_error, test_set, performance_metric,
|
||||
device=device)
|
||||
testing_set_performance_and_compatibility["lambda_c"] = lambda_c
|
||||
testing_set_performance_and_compatibility["training"] = False
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import torch
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
|
||||
def model_accuracy(model, dataset, device="cpu"):
|
||||
model_performance = 0
|
||||
number_of_batches = len(dataset)
|
||||
with torch.no_grad():
|
||||
for data, target in dataset:
|
||||
if device != "cpu":
|
||||
data = data.to(device)
|
||||
target = target.to(device)
|
||||
_, _, output_logsoftmax = model(data)
|
||||
output_labels = torch.argmax(output_logsoftmax, 1)
|
||||
if device != "cpu":
|
||||
output_labels = output_labels.cpu()
|
||||
target = target.cpu()
|
||||
performance = accuracy_score(output_labels.numpy(), target.numpy())
|
||||
model_performance += performance
|
||||
# _clean_from_gpu([data, target])
|
||||
|
||||
model_performance /= number_of_batches
|
||||
return model_performance
|
|
@ -6,6 +6,7 @@ import json
|
|||
import threading
|
||||
from queue import Queue
|
||||
from backwardcompatibilityml.helpers import training
|
||||
from backwardcompatibilityml.metrics import model_accuracy
|
||||
|
||||
|
||||
class SweepManager(object):
|
||||
|
@ -58,6 +59,7 @@ class SweepManager(object):
|
|||
NewErrorLossClass, StrictImitationLossClass, lambda_c_stepsize=0.25,
|
||||
new_error_loss_kwargs=None,
|
||||
strict_imitation_loss_kwargs=None,
|
||||
performance_metric=model_accuracy,
|
||||
device="cpu"):
|
||||
self.folder_name = folder_name
|
||||
self.number_of_epochs = number_of_epochs
|
||||
|
@ -71,6 +73,7 @@ class SweepManager(object):
|
|||
self.optimizer_kwargs = optimizer_kwargs
|
||||
self.NewErrorLossClass = NewErrorLossClass
|
||||
self.StrictImitationLossClass = StrictImitationLossClass
|
||||
self.performance_metric = performance_metric
|
||||
self.lambda_c_stepsize = lambda_c_stepsize
|
||||
self.new_error_loss_kwargs = new_error_loss_kwargs
|
||||
self.strict_imitation_loss_kwargs = strict_imitation_loss_kwargs
|
||||
|
@ -83,7 +86,8 @@ class SweepManager(object):
|
|||
self.training_set, self.test_set,
|
||||
self.batch_size_train, self.batch_size_test,
|
||||
self.OptimizerClass, self.optimizer_kwargs,
|
||||
self.NewErrorLossClass, self.StrictImitationLossClass,),
|
||||
self.NewErrorLossClass, self.StrictImitationLossClass,
|
||||
self.performance_metric,),
|
||||
kwargs={
|
||||
"lambda_c_stepsize": self.lambda_c_stepsize,
|
||||
"percent_complete_queue": self.percent_complete_queue,
|
||||
|
@ -116,6 +120,7 @@ class SweepManager(object):
|
|||
def get_sweep_summary(self):
|
||||
sweep_summary = {
|
||||
"h1_performance": None,
|
||||
"performance_metric": self.performance_metric.__name__,
|
||||
"data": []
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ import torch.optim as optim
|
|||
from flask import Response
|
||||
from backwardcompatibilityml import loss
|
||||
from backwardcompatibilityml.sweep_management import SweepManager
|
||||
from backwardcompatibilityml.metrics import model_accuracy
|
||||
from rai_core_flask.flask_helper import FlaskHelper
|
||||
from rai_core_flask.environments import (
|
||||
AzureNBEnvironment,
|
||||
|
@ -169,6 +170,13 @@ class CompatibilityAnalysis(object):
|
|||
StrictImitationLossClass: The class of the Strict Imitation style loss
|
||||
function to be instantiated and used to perform compatibility
|
||||
constrained training of our model h2.
|
||||
performance_metric: A function to evaluate model performance. The function is
|
||||
expected to have the following signature:
|
||||
metric(model, dataset, device)
|
||||
model: The model being evaluated
|
||||
dataset: The dataset as a list of (input, target) pairs
|
||||
device: The device Pytorch is using for training - "cpu" or "cuda"
|
||||
If unspecified, then accuracy is used.
|
||||
port: An integer value to indicate the port to which the Flask service
|
||||
should bind.
|
||||
device: A string with values either "cpu" or "cuda" to indicate the
|
||||
|
@ -182,6 +190,7 @@ class CompatibilityAnalysis(object):
|
|||
batch_size_train, batch_size_test, lambda_c_stepsize=0.25,
|
||||
OptimizerClass=None, optimizer_kwargs=None,
|
||||
NewErrorLossClass=None, StrictImitationLossClass=None,
|
||||
performance_metric=model_accuracy,
|
||||
port=None, new_error_loss_kwargs=None,
|
||||
strict_imitation_loss_kwargs=None, device="cpu"):
|
||||
if OptimizerClass is None:
|
||||
|
@ -211,6 +220,7 @@ class CompatibilityAnalysis(object):
|
|||
lambda_c_stepsize=lambda_c_stepsize,
|
||||
new_error_loss_kwargs=new_error_loss_kwargs,
|
||||
strict_imitation_loss_kwargs=strict_imitation_loss_kwargs,
|
||||
performance_metric=performance_metric,
|
||||
device=device)
|
||||
|
||||
self.flask_service = FlaskHelper(ip="0.0.0.0", port=port)
|
||||
|
|
|
@ -103,6 +103,7 @@ function Container({
|
|||
<PerformanceCompatibility
|
||||
data={data.data}
|
||||
h1Performance={data.h1_performance}
|
||||
performanceMetric={data.performance_metric}
|
||||
training={training}
|
||||
testing={testing}
|
||||
newError={newError}
|
||||
|
@ -115,6 +116,7 @@ function Container({
|
|||
<PerformanceCompatibility
|
||||
data={data.data}
|
||||
h1Performance={data.h1_performance}
|
||||
performanceMetric={data.performance_metric}
|
||||
training={training}
|
||||
testing={testing}
|
||||
newError={newError}
|
||||
|
|
|
@ -25,6 +25,7 @@ type PerformanceCompatibilityProps = {
|
|||
strictImitation: boolean,
|
||||
selectedDataPoint: any,
|
||||
compatibilityScoreType: string,
|
||||
performanceMetric: string,
|
||||
selectDataPoint: (d: any) => void,
|
||||
getModelEvaluationData: (evaluationId: number) => void
|
||||
}
|
||||
|
@ -170,11 +171,11 @@ class PerformanceCompatibility extends Component<PerformanceCompatibilityProps,
|
|||
.append('text')
|
||||
.attr('id', 'yAxisLabel')
|
||||
.attr('transform','rotate(-90)')
|
||||
.attr('x',-h/2)
|
||||
.attr('x',-h/2+2.5*this.props.performanceMetric.length)
|
||||
.attr('y',-50)
|
||||
.attr('dy','.71em')
|
||||
.style('text-anchor','end')
|
||||
.text('Performance')
|
||||
.text(this.props.performanceMetric)
|
||||
.attr("font-family", "sans-serif")
|
||||
.attr("font-size", "20px")
|
||||
.attr("fill", "black");
|
||||
|
|
Загрузка…
Ссылка в новой задаче