From 7c78b0141308804745b4056a1ea4d87be2ae4342 Mon Sep 17 00:00:00 2001 From: Sai Kothinti Date: Wed, 13 Nov 2024 00:57:12 +0530 Subject: [PATCH] Online Evaluation: handle empty data when queried (#3593) * handle empty data when queried * fix incorrect span_id, trace_id --- .../context/online_eval/evaluate.py | 4 +++- .../context/online_eval/postprocess.py | 7 +++++-- .../context/online_eval/utils.py | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/evaluate.py b/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/evaluate.py index 18b9104c27..6b6afad543 100644 --- a/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/evaluate.py +++ b/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/evaluate.py @@ -18,7 +18,7 @@ from azure.ai.evaluation import evaluate from azure.ai.ml.identity import AzureMLOnBehalfOfCredential from collections import defaultdict -from utils import get_mlclient, extract_model_info +from utils import get_mlclient, extract_model_info, is_input_data_empty logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -228,6 +228,8 @@ rai_evaluators = [ def run(args): """Entry point of model prediction script.""" + if is_input_data_empty(args["preprocessed_data"]): + return evaluators = json.loads(args["evaluators"]) # evaluators = download_evaluators_and_update_local_path(evaluators) evaluators = copy_evaluator_files(args) diff --git a/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/postprocess.py b/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/postprocess.py index 21ec1f6c53..a801c5ed8e 100644 --- a/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/postprocess.py +++ b/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/postprocess.py @@ -14,6 +14,7 @@ from opentelemetry.trace.span import TraceFlags from opentelemetry.sdk._logs import LoggerProvider from opentelemetry.sdk._logs.export import BatchLogRecordProcessor from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter +from utils import is_input_data_empty import logging @@ -78,10 +79,10 @@ def log_evaluation_event(row) -> None: logger.warning("Missing required fields in the row: evaluation") if "trace_id" not in row: logger.debug(f"Missing trace_id from user query result, taking default of column {DEFAULT_TRACE_ID_COLUMN}") - trace_id = int(row.get("trace_id", row.get(DEFAULT_TRACE_ID_COLUMN, "0")), 16) if "span_id" not in row: logger.debug(f"Missing span_id from user query result, taking default of column {DEFAULT_SPAN_ID_COLUMN}") - span_id = int(row.get("span_id", row.get(DEFAULT_SPAN_ID_COLUMN, "0")), 16) + trace_id = int(row.get("trace_id", row.get(DEFAULT_TRACE_ID_COLUMN, "0")), 16) + span_id = int(row.get("span_id", row.get(DEFAULT_SPAN_ID_COLUMN, "0")), 16) trace_flags = TraceFlags(TraceFlags.SAMPLED) response_id = row.get("gen_ai_response_id", "") evaluation_results = row.get("evaluation", {}) @@ -109,6 +110,8 @@ def get_combined_data(preprocessed_data, evaluated_data, service_name): def run(args): """Entry point of model prediction script.""" logger.info(f"Commandline args:> Service Name: {args['service_name']}") + if is_input_data_empty(args["preprocessed_data"]): + return provider = configure_logging(args) data = get_combined_data(args["preprocessed_data"], args["evaluated_data"], args["service_name"]) diff --git a/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/utils.py b/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/utils.py index e4e234c38f..bc137c47d7 100644 --- a/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/utils.py +++ b/assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/utils.py @@ -4,6 +4,7 @@ """Utility functions for the online evaluation context.""" import os import re +import pandas as pd from azure.ai.ml import MLClient from azure.ai.ml.identity import AzureMLOnBehalfOfCredential @@ -116,3 +117,16 @@ def get_mlclient( logger.info(f"Creating MLClient with registry name {registry_name}") return MLClient(credential=credential, registry_name=registry_name) + + +def is_input_data_empty(data_file_path): + """Check if the input data is empty.""" + if not data_file_path: + logger.info("Data file path is empty. Exiting.") + return True + + df = pd.read_json(data_file_path, lines=True) + if len(df) == 0: + logger.info("Empty data in preprocessed file. Exiting.") + return True + return False