Online Evaluation: handle empty data when queried (#3593)

* handle empty data when queried

* fix incorrect span_id, trace_id
This commit is contained in:
Sai Kothinti 2024-11-13 00:57:12 +05:30 коммит произвёл GitHub
Родитель 9ffe6988d6
Коммит 7c78b01413
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
3 изменённых файлов: 22 добавлений и 3 удалений

Просмотреть файл

@ -18,7 +18,7 @@ from azure.ai.evaluation import evaluate
from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
from collections import defaultdict
from utils import get_mlclient, extract_model_info
from utils import get_mlclient, extract_model_info, is_input_data_empty
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
@ -228,6 +228,8 @@ rai_evaluators = [
def run(args):
"""Entry point of model prediction script."""
if is_input_data_empty(args["preprocessed_data"]):
return
evaluators = json.loads(args["evaluators"])
# evaluators = download_evaluators_and_update_local_path(evaluators)
evaluators = copy_evaluator_files(args)

Просмотреть файл

@ -14,6 +14,7 @@ from opentelemetry.trace.span import TraceFlags
from opentelemetry.sdk._logs import LoggerProvider
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter
from utils import is_input_data_empty
import logging
@ -78,10 +79,10 @@ def log_evaluation_event(row) -> None:
logger.warning("Missing required fields in the row: evaluation")
if "trace_id" not in row:
logger.debug(f"Missing trace_id from user query result, taking default of column {DEFAULT_TRACE_ID_COLUMN}")
trace_id = int(row.get("trace_id", row.get(DEFAULT_TRACE_ID_COLUMN, "0")), 16)
if "span_id" not in row:
logger.debug(f"Missing span_id from user query result, taking default of column {DEFAULT_SPAN_ID_COLUMN}")
span_id = int(row.get("span_id", row.get(DEFAULT_SPAN_ID_COLUMN, "0")), 16)
trace_id = int(row.get("trace_id", row.get(DEFAULT_TRACE_ID_COLUMN, "0")), 16)
span_id = int(row.get("span_id", row.get(DEFAULT_SPAN_ID_COLUMN, "0")), 16)
trace_flags = TraceFlags(TraceFlags.SAMPLED)
response_id = row.get("gen_ai_response_id", "")
evaluation_results = row.get("evaluation", {})
@ -109,6 +110,8 @@ def get_combined_data(preprocessed_data, evaluated_data, service_name):
def run(args):
"""Entry point of model prediction script."""
logger.info(f"Commandline args:> Service Name: {args['service_name']}")
if is_input_data_empty(args["preprocessed_data"]):
return
provider = configure_logging(args)
data = get_combined_data(args["preprocessed_data"], args["evaluated_data"],
args["service_name"])

Просмотреть файл

@ -4,6 +4,7 @@
"""Utility functions for the online evaluation context."""
import os
import re
import pandas as pd
from azure.ai.ml import MLClient
from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
@ -116,3 +117,16 @@ def get_mlclient(
logger.info(f"Creating MLClient with registry name {registry_name}")
return MLClient(credential=credential, registry_name=registry_name)
def is_input_data_empty(data_file_path):
"""Check if the input data is empty."""
if not data_file_path:
logger.info("Data file path is empty. Exiting.")
return True
df = pd.read_json(data_file_path, lines=True)
if len(df) == 0:
logger.info("Empty data in preprocessed file. Exiting.")
return True
return False