msrflute/core/metrics.py

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
In this file we define the wrapper class for 
implementing metrics.
'''
import logging

import numpy as np
import torch

from utils import print_rank

class Metrics():

    def __init__(self):
        super().__init__()

    def compute_metrics(self,dataloader, model):
        '''This method is called by ´run_validation_generic´ function 
        inside trainer.py .
        
        This is just a helper function that computes the metrics returned 
        in the inference function inside ´model.py´.
        '''
        print_rank("Computing metrics")
        return self.call_inference(dataloader,model)

    def call_inference(self, dataloader, model):
        
        metrics, sum_metrics = dict(), dict()
        output_tot = {"probabilities": [], "predictions": [], "labels":[]}
        counter = 0

        with torch.no_grad():
            for _, batch in enumerate(dataloader):
                val_loss = model.loss(batch).item()
                inf_results = model.inference(batch)
                inf_results ['loss'] = {'value': val_loss,'higher_is_better': False}
                output = inf_results.pop('output')
                batch_size = inf_results.pop('batch_size')

                for key in inf_results.keys():
                    if not isinstance(inf_results[key], dict):
                        inf_results[key] = {'value':inf_results[key],'higher_is_better': True}
                    sum_metrics[key] = [] if not key in sum_metrics else sum_metrics[key]

                if isinstance(output, dict):
                    output_tot["probabilities"].append(output["probabilities"])
                    output_tot["predictions"].append(output["predictions"])
                    output_tot["labels"].append(output["labels"])

                for q in inf_results.keys():
                    sum_metrics[q].append(inf_results[q]['value']* batch_size)
                counter += batch_size
                torch.cuda.empty_cache()

        output_tot["probabilities"] = np.concatenate(output_tot["probabilities"]) if output_tot["probabilities"] else []
        output_tot["predictions"] = np.concatenate(output_tot["predictions"]) if output_tot["predictions"] else []
        output_tot["labels"] = np.concatenate(output_tot["labels"]) if output_tot["labels"] else []

        # Post-processing of metrics
        print_rank(f"validation complete {counter}", loglevel=logging.DEBUG)
        model.set_train()

        for k in inf_results.keys():
            metrics[k] = inf_results[k]
            metrics[k]['value'] = sum(sum_metrics[k])/counter

        print_rank(f"validation examples {counter}", loglevel=logging.DEBUG)
        torch.cuda.empty_cache()
        
        return output_tot, metrics
-												Merged PR 1056: Allowing customized metrics

											
										
										
											2022-02-17 00:10:26 +03:00
+								# Copyright (c) Microsoft Corporation.
 								# Licensed under the MIT license.
 								'''
 								In this file we define the wrapper class for
 								implementing metrics.
 								'''
 								import logging
 								import numpy as np
 								import torch
 								from utils import print_rank
-												Merged PR 1139: Abstract class for models

- Include abstract class for models in core/model.py
- Update in model classes accordingly per experiment.
- Remove abstract class for metrics (it is no longer necessary), new metrics only should be declared in the returning dictionary of `inference()` and FLUTE will recognize them during the evaluation rounds.
- custom_metrics.py inside each experiment folder is not needed anymore.
- Update in the docs for model implementation and metrics.

											
										
										
											2022-05-05 02:32:02 +03:00
+								class Metrics():
-												Merged PR 1056: Allowing customized metrics

											
										
										
											2022-02-17 00:10:26 +03:00
 								    def __init__(self):
 								        super().__init__()
 								    def compute_metrics(self,dataloader, model):
-												Merged PR 1139: Abstract class for models

- Include abstract class for models in core/model.py
- Update in model classes accordingly per experiment.
- Remove abstract class for metrics (it is no longer necessary), new metrics only should be declared in the returning dictionary of `inference()` and FLUTE will recognize them during the evaluation rounds.
- custom_metrics.py inside each experiment folder is not needed anymore.
- Update in the docs for model implementation and metrics.

											
										
										
											2022-05-05 02:32:02 +03:00
+								        '''This method is called by ´run_validation_generic´ function
-												Merged PR 1056: Allowing customized metrics

											
										
										
											2022-02-17 00:10:26 +03:00
+								        inside trainer.py .
-												Merged PR 1139: Abstract class for models

- Include abstract class for models in core/model.py
- Update in model classes accordingly per experiment.
- Remove abstract class for metrics (it is no longer necessary), new metrics only should be declared in the returning dictionary of `inference()` and FLUTE will recognize them during the evaluation rounds.
- custom_metrics.py inside each experiment folder is not needed anymore.
- Update in the docs for model implementation and metrics.

											
										
										
											2022-05-05 02:32:02 +03:00
+								        This is just a helper function that computes the metrics returned
 								        in the inference function inside ´model.py´.
-												Merged PR 1056: Allowing customized metrics

											
										
										
											2022-02-17 00:10:26 +03:00
+								        '''
 								        print_rank("Computing metrics")
-												Merged PR 1139: Abstract class for models

- Include abstract class for models in core/model.py
- Update in model classes accordingly per experiment.
- Remove abstract class for metrics (it is no longer necessary), new metrics only should be declared in the returning dictionary of `inference()` and FLUTE will recognize them during the evaluation rounds.
- custom_metrics.py inside each experiment folder is not needed anymore.
- Update in the docs for model implementation and metrics.

											
										
										
											2022-05-05 02:32:02 +03:00
+								        return self.call_inference(dataloader,model)
-												Merged PR 1056: Allowing customized metrics

											
										
										
											2022-02-17 00:10:26 +03:00
-												Merged PR 1139: Abstract class for models

- Include abstract class for models in core/model.py
- Update in model classes accordingly per experiment.
- Remove abstract class for metrics (it is no longer necessary), new metrics only should be declared in the returning dictionary of `inference()` and FLUTE will recognize them during the evaluation rounds.
- custom_metrics.py inside each experiment folder is not needed anymore.
- Update in the docs for model implementation and metrics.

											
										
										
											2022-05-05 02:32:02 +03:00
+								    def call_inference(self, dataloader, model):
 								        metrics, sum_metrics = dict(), dict()
-												Merged PR 1056: Allowing customized metrics

											
										
										
											2022-02-17 00:10:26 +03:00
+								        output_tot = {"probabilities": [], "predictions": [], "labels":[]}
 								        counter = 0
 								        with torch.no_grad():
 								            for _, batch in enumerate(dataloader):
 								                val_loss = model.loss(batch).item()
 								                inf_results = model.inference(batch)
-												Merged PR 1139: Abstract class for models

- Include abstract class for models in core/model.py
- Update in model classes accordingly per experiment.
- Remove abstract class for metrics (it is no longer necessary), new metrics only should be declared in the returning dictionary of `inference()` and FLUTE will recognize them during the evaluation rounds.
- custom_metrics.py inside each experiment folder is not needed anymore.
- Update in the docs for model implementation and metrics.

											
										
										
											2022-05-05 02:32:02 +03:00
+								                inf_results ['loss'] = {'value': val_loss,'higher_is_better': False}
 								                output = inf_results.pop('output')
 								                batch_size = inf_results.pop('batch_size')
 								                for key in inf_results.keys():
 								                    if not isinstance(inf_results[key], dict):
 								                        inf_results[key] = {'value':inf_results[key],'higher_is_better': True}
 								                    sum_metrics[key] = [] if not key in sum_metrics else sum_metrics[key]
-												Merged PR 1056: Allowing customized metrics

											
										
										
											2022-02-17 00:10:26 +03:00
 								                if isinstance(output, dict):
 								                    output_tot["probabilities"].append(output["probabilities"])
 								                    output_tot["predictions"].append(output["predictions"])
 								                    output_tot["labels"].append(output["labels"])
-												Merged PR 1139: Abstract class for models

- Include abstract class for models in core/model.py
- Update in model classes accordingly per experiment.
- Remove abstract class for metrics (it is no longer necessary), new metrics only should be declared in the returning dictionary of `inference()` and FLUTE will recognize them during the evaluation rounds.
- custom_metrics.py inside each experiment folder is not needed anymore.
- Update in the docs for model implementation and metrics.

											
										
										
											2022-05-05 02:32:02 +03:00
+								                for q in inf_results.keys():
 								                    sum_metrics[q].append(inf_results[q]['value']* batch_size)
-												Merged PR 1056: Allowing customized metrics

											
										
										
											2022-02-17 00:10:26 +03:00
+								                counter += batch_size
-												Merged PR 1503: Fix replay_server option

Fix replay_server option + update requirements

Sidenote: Seems like python 3.8 is returning some issues when running, I've updated the readme to use python 3.7 since it's the one I'm using in AML and the local sandbox.

Sanity-checks:
[X] https://aka.ms/amlt?q=e3q0b

											
										
										
											2023-03-16 19:28:07 +03:00
+								                torch.cuda.empty_cache()
-												Merged PR 1056: Allowing customized metrics

											
										
										
											2022-02-17 00:10:26 +03:00
 								        output_tot["probabilities"] = np.concatenate(output_tot["probabilities"]) if output_tot["probabilities"] else []
 								        output_tot["predictions"] = np.concatenate(output_tot["predictions"]) if output_tot["predictions"] else []
 								        output_tot["labels"] = np.concatenate(output_tot["labels"]) if output_tot["labels"] else []
 								        # Post-processing of metrics
 								        print_rank(f"validation complete {counter}", loglevel=logging.DEBUG)
 								        model.set_train()
-												Merged PR 1139: Abstract class for models

- Include abstract class for models in core/model.py
- Update in model classes accordingly per experiment.
- Remove abstract class for metrics (it is no longer necessary), new metrics only should be declared in the returning dictionary of `inference()` and FLUTE will recognize them during the evaluation rounds.
- custom_metrics.py inside each experiment folder is not needed anymore.
- Update in the docs for model implementation and metrics.

											
										
										
											2022-05-05 02:32:02 +03:00
+								        for k in inf_results.keys():
 								            metrics[k] = inf_results[k]
 								            metrics[k]['value'] = sum(sum_metrics[k])/counter
-												Merged PR 1056: Allowing customized metrics

											
										
										
											2022-02-17 00:10:26 +03:00
-												Merged PR 1139: Abstract class for models

- Include abstract class for models in core/model.py
- Update in model classes accordingly per experiment.
- Remove abstract class for metrics (it is no longer necessary), new metrics only should be declared in the returning dictionary of `inference()` and FLUTE will recognize them during the evaluation rounds.
- custom_metrics.py inside each experiment folder is not needed anymore.
- Update in the docs for model implementation and metrics.

											
										
										
											2022-05-05 02:32:02 +03:00
+								        print_rank(f"validation examples {counter}", loglevel=logging.DEBUG)
-												Merged PR 1503: Fix replay_server option

Fix replay_server option + update requirements

Sidenote: Seems like python 3.8 is returning some issues when running, I've updated the readme to use python 3.7 since it's the one I'm using in AML and the local sandbox.

Sanity-checks:
[X] https://aka.ms/amlt?q=e3q0b

											
										
										
											2023-03-16 19:28:07 +03:00
+								        torch.cuda.empty_cache()
-												Merged PR 1139: Abstract class for models

- Include abstract class for models in core/model.py
- Update in model classes accordingly per experiment.
- Remove abstract class for metrics (it is no longer necessary), new metrics only should be declared in the returning dictionary of `inference()` and FLUTE will recognize them during the evaluation rounds.
- custom_metrics.py inside each experiment folder is not needed anymore.
- Update in the docs for model implementation and metrics.

											
										
										
											2022-05-05 02:32:02 +03:00
+								        return output_tot, metrics