Add helper method for genai metrics (#2519)

* Added info about required packages

* Update responsibleaidashboard-question-answering-model-debugging.ipynb

* show example prediction

* Update responsibleaidashboard-question-answering-model-debugging.ipynb

* add helper method for genai metrics

Signed-off-by: Kartik Choudhary <kartikchoudh@umass.edu>

* Fix import order in metrics.py

Signed-off-by: Kartik Choudhary <kartikchoudh@umass.edu>

---------

Signed-off-by: Kartik Choudhary <kartikchoudh@umass.edu>
This commit is contained in:
Kartik Choudhary 2024-01-31 10:46:34 -05:00 коммит произвёл GitHub
Родитель 84428aa63f
Коммит 5b4bd20560
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
2 изменённых файлов: 52 добавлений и 57 удалений

Просмотреть файл

@ -6,6 +6,8 @@
import logging
from pathlib import Path
import numpy as np
module_logger = logging.getLogger(__name__)
module_logger.setLevel(logging.INFO)
@ -30,3 +32,16 @@ def get_genai_metric(metric_name, **metric_kwargs):
metric = evaluate.load(
str(curr_file_dir.joinpath(f'scripts/{metric_name}.py')))
return metric.compute(**metric_kwargs)
def get_genai_metric_mean(metric_name, **metric_kwargs):
"""Get the mean of the metric from the genai library.
:param metric_name: The name of the metric.
:type metric_name: str
:param metric_kwargs: The keyword arguments to pass to the metric.
:type metric_kwargs: dict
:return: The mean of the metric.
:rtype: float
"""
return np.mean(get_genai_metric(metric_name, **metric_kwargs)['scores'])

Просмотреть файл

@ -1,7 +1,8 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.
from responsibleai_text.utils.genai_metrics.metrics import get_genai_metric
from responsibleai_text.utils.genai_metrics.metrics import (
get_genai_metric, get_genai_metric_mean)
PREDICTIONS = ['This is a prediction']
REFERENCES = ['This is a reference']
@ -15,69 +16,48 @@ class DummyModelWrapper:
class TestGenAIMetrics:
def test_coherence(self):
metric = get_genai_metric('coherence',
predictions=PREDICTIONS,
references=REFERENCES,
def assert_metrics(self, metric_name,
expected, input_len,
**metric_kwargs):
metric = get_genai_metric(metric_name, **metric_kwargs,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
assert metric['scores'] == [expected]
metric = get_genai_metric('coherence',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
metric_mean = get_genai_metric_mean(metric_name, **metric_kwargs,
wrapper_model=DummyModelWrapper())
assert metric_mean == expected
kwargs_multi = {k: v * input_len for k, v in metric_kwargs.items()}
metric_multi = get_genai_metric(metric_name, **kwargs_multi,
wrapper_model=DummyModelWrapper())
assert metric_multi['scores'] == [expected] * input_len
metric_mean_multi = get_genai_metric_mean(
metric_name, **kwargs_multi, wrapper_model=DummyModelWrapper())
assert metric_mean_multi == expected
def test_coherence(self):
self.assert_metrics('coherence', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)
def test_equivalence(self):
metric = get_genai_metric('equivalence',
predictions=PREDICTIONS,
references=REFERENCES,
answers=ANSWERS,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
metric = get_genai_metric('equivalence',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
answers=ANSWERS * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
self.assert_metrics('equivalence', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES,
answers=ANSWERS)
def test_fluency(self):
metric = get_genai_metric('fluency',
predictions=PREDICTIONS,
references=REFERENCES,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
metric = get_genai_metric('fluency',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
self.assert_metrics('fluency', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)
def test_groundedness(self):
metric = get_genai_metric('groundedness',
predictions=PREDICTIONS,
references=REFERENCES,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
metric = get_genai_metric('groundedness',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
self.assert_metrics('groundedness', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)
def test_relevance(self):
metric = get_genai_metric('relevance',
predictions=PREDICTIONS,
references=REFERENCES,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
metric = get_genai_metric('relevance',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
self.assert_metrics('relevance', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)