Merge remote-tracking branch 'origin/main'

This commit is contained in:
Chandra Sekhar Gupta Aravapalli 2024-11-06 16:40:19 +05:30
Родитель 2f11d74f70 2442cb2caf
Коммит 8239b9f5b8
795 изменённых файлов: 4546 добавлений и 1210 удалений

Просмотреть файл

@ -0,0 +1,6 @@
name: evaluation
version: auto
type: environment
spec: spec.yaml
extra_config: environment.yaml
categories: ["Models"]

Просмотреть файл

@ -0,0 +1,23 @@
FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04:{{latest-image-tag}}
RUN apt-get update && apt-get -y upgrade
WORKDIR /
ENV CONDA_PREFIX=/azureml-envs/aoai-evaluation
ENV CONDA_DEFAULT_ENV=$CONDA_PREFIX
COPY conda.yaml .
# Create conda environment
RUN conda env create -p $CONDA_PREFIX -f conda.yaml -q && \
rm conda.yaml && \
conda run -p $CONDA_PREFIX pip cache purge && \
conda clean -a -y
# clean conda and pip caches
RUN rm -rf ~/.cache/pip
# Prepend path to AzureML conda environment
ENV PATH=$CONDA_PREFIX/bin:$PATH
# This is needed for mpi to locate libpython
ENV LD_LIBRARY_PATH $CONDA_PREFIX/lib:$LD_LIBRARY_PATH

Просмотреть файл

@ -0,0 +1,19 @@
name: evaluation
channels:
- conda-forge
- anaconda
dependencies:
- python=3.10
- pip=23.1
- pip:
- azureml-mlflow=={{latest-pypi-version}}
- azure-ai-ml=={{latest-pypi-version}}
- azureml-core=={{latest-pypi-version}}
- azureml-telemetry=={{latest-pypi-version}}
- mltable=={{latest-pypi-version}}
- azureml-automl-core=={{latest-pypi-version}}
- rouge-score>=0.1.2
- nltk>=3.9.1
- pandas>=2.2.3
- tiktoken>=0.8.0
- openai>=1.52.2

Просмотреть файл

@ -0,0 +1,12 @@
image:
name: azureml/curated/evaluation
os: linux
context:
dir: context
dockerfile: Dockerfile
template_files:
- Dockerfile
- conda.yaml
publish:
location: mcr
visibility: public

Просмотреть файл

@ -0,0 +1,15 @@
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
description: >-
Environment for evaluating models.
name: "{{asset.name}}"
version: "{{asset.version}}"
build:
path: context/
os_type: linux
tags:
OS: Ubuntu22.04
Evaluation: ""

Просмотреть файл

@ -0,0 +1,6 @@
name: component
version: auto
type: environment
spec: spec.yaml
extra_config: environment.yaml
categories: ["Designer", "Python"]

Просмотреть файл

@ -0,0 +1,16 @@
FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04:{{latest-image-tag}}
WORKDIR /
ENV CONDA_PREFIX=/azureml-envs/component-sdk
ENV CONDA_DEFAULT_ENV=$CONDA_PREFIX
ENV PATH=$CONDA_PREFIX/bin:$PATH
# This is needed for mpi to locate libpython
ENV LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
# Create conda environment
COPY conda_dependencies.yaml .
RUN conda env create -p $CONDA_PREFIX -f conda_dependencies.yaml -q && \
rm conda_dependencies.yaml && \
conda run -p $CONDA_PREFIX pip cache purge && \
conda clean -a -y

Просмотреть файл

@ -0,0 +1,10 @@
name: component
channels:
- conda-forge
dependencies:
- python=3.9
- pip=24.0
- pip:
- azure-ml-component=={{latest-pypi-version}}
- azureml-defaults=={{latest-pypi-version}}
- pyarrow>=14.0.1

Просмотреть файл

@ -0,0 +1,12 @@
image:
name: azureml/curated/component
os: linux
context:
dir: context
dockerfile: Dockerfile
template_files:
- Dockerfile
- conda_dependencies.yaml
publish:
location: mcr
visibility: public

Просмотреть файл

@ -0,0 +1,17 @@
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
description: >-
An environment for built-in component that can dynamically evaluate python expression for 1P request feature.
name: "{{asset.name}}"
version: "{{asset.version}}"
build:
path: "{{image.context.path}}"
dockerfile_path: "{{image.dockerfile.path}}"
os_type: linux
tags:
OS: Ubuntu22.04
Python: 3.9

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: amazonpolarityclassification_cohere-embed-v3-english_classification
version: 2.07.22
version: 2.04.11
display_name: AmazonPolarityClassification_cohere-embed-v3-english_classification
description: cohere-embed-v3-english run for AmazonPolarityClassification dataset
dataset_name: AmazonPolarityClassification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-english
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
metrics:
accuracy: 0.927643

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: amazonpolarityclassification_cohere-embed-v3-multilingual_classification
version: 2.07.22
version: 2.04.11
display_name: AmazonPolarityClassification_cohere-embed-v3-multilingual_classification
description: cohere-embed-v3-multilingual run for AmazonPolarityClassification dataset
dataset_name: AmazonPolarityClassification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-multilingual
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
metrics:
accuracy: 0.912307

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: amazonpolarityclassification_text-embedding-3-large_classification
version: 2.07.22
version: 2.04.11
display_name: AmazonPolarityClassification_text-embedding-3-large_classification
description: text-embedding-3-large run for AmazonPolarityClassification dataset
dataset_name: AmazonPolarityClassification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-large
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
metrics:
accuracy: 0.92868975

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: amazonpolarityclassification_text-embedding-3-small_classification
version: 2.07.22
version: 2.04.11
display_name: AmazonPolarityClassification_text-embedding-3-small_classification
description: text-embedding-3-small run for AmazonPolarityClassification dataset
dataset_name: AmazonPolarityClassification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-small
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
metrics:
accuracy: 0.90878075

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: amazonpolarityclassification_text-embedding-ada-002_classification
version: 2.07.22
version: 2.04.11
display_name: AmazonPolarityClassification_text-embedding-ada-002_classification
description: text-embedding-ada-002 run for AmazonPolarityClassification dataset
dataset_name: AmazonPolarityClassification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azure-openai
azure_model_name: text-embedding-ada-002
azure_latest_model_version: 2
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
metrics:
accuracy: 0.867263

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arguana_cohere-embed-v3-english_retrieval
version: 2.22.05
version: 2.04.11
display_name: ArguAna_cohere-embed-v3-english_retrieval
description: cohere-embed-v3-english run for ArguAna dataset
dataset_name: ArguAna
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: retrieval
primary_metric: ndcg_at_10
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-english
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
metrics:
ndcg_at_10: 0.57529

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arguana_cohere-embed-v3-multilingual_retrieval
version: 2.22.05
version: 2.04.11
display_name: ArguAna_cohere-embed-v3-multilingual_retrieval
description: cohere-embed-v3-multilingual run for ArguAna dataset
dataset_name: ArguAna
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: retrieval
primary_metric: ndcg_at_10
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-multilingual
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
metrics:
ndcg_at_10: 0.57989

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arguana_text-embedding-3-large_retrieval
version: 2.07.22
version: 2.04.11
display_name: ArguAna_text-embedding-3-large_retrieval
description: text-embedding-3-large run for ArguAna dataset
dataset_name: ArguAna
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: retrieval
primary_metric: ndcg_at_10
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-large
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
metrics:
ndcg_at_10: 0.58013

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arguana_text-embedding-3-small_retrieval
version: 2.07.22
version: 2.04.11
display_name: ArguAna_text-embedding-3-small_retrieval
description: text-embedding-3-small run for ArguAna dataset
dataset_name: ArguAna
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: retrieval
primary_metric: ndcg_at_10
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-small
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
metrics:
ndcg_at_10: 0.55694

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arguana_text-embedding-ada-002_retrieval
version: 2.22.05
version: 2.04.11
display_name: ArguAna_text-embedding-ada-002_retrieval
description: text-embedding-ada-002 run for ArguAna dataset
dataset_name: ArguAna
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: retrieval
primary_metric: ndcg_at_10
azure_registry_name: azure-openai
azure_model_name: text-embedding-ada-002
azure_latest_model_version: 2
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
metrics:
ndcg_at_10: 0.57455

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arxivclusteringp2p.v2_cohere-embed-v3-english_clustering
version: 2.07.22
version: 2.04.11
display_name: ArxivClusteringP2P.v2_cohere-embed-v3-english_clustering
description: cohere-embed-v3-english run for ArxivClusteringP2P.v2 dataset
dataset_name: ArxivClusteringP2P.v2
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: clustering
primary_metric: v_measure
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-english
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
metrics:
v_measure: 0.5081042703542442

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arxivclusteringp2p.v2_cohere-embed-v3-multilingual_clustering
version: 2.07.22
version: 2.04.11
display_name: ArxivClusteringP2P.v2_cohere-embed-v3-multilingual_clustering
description: cohere-embed-v3-multilingual run for ArxivClusteringP2P.v2 dataset
dataset_name: ArxivClusteringP2P.v2
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: clustering
primary_metric: v_measure
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-multilingual
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
metrics:
v_measure: 0.5029184573976476

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arxivclusteringp2p.v2_text-embedding-3-large_clustering
version: 2.07.22
version: 2.04.11
display_name: ArxivClusteringP2P.v2_text-embedding-3-large_clustering
description: text-embedding-3-large run for ArxivClusteringP2P.v2 dataset
dataset_name: ArxivClusteringP2P.v2
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: clustering
primary_metric: v_measure
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-large
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
metrics:
v_measure: 0.519053128352996

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arxivclusteringp2p.v2_text-embedding-3-small_clustering
version: 2.07.22
version: 2.04.11
display_name: ArxivClusteringP2P.v2_text-embedding-3-small_clustering
description: text-embedding-3-small run for ArxivClusteringP2P.v2 dataset
dataset_name: ArxivClusteringP2P.v2
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: clustering
primary_metric: v_measure
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-small
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
metrics:
v_measure: 0.496692276507199

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arxivclusteringp2p.v2_text-embedding-ada-002_clustering
version: 2.07.22
version: 2.04.11
display_name: ArxivClusteringP2P.v2_text-embedding-ada-002_clustering
description: text-embedding-ada-002 run for ArxivClusteringP2P.v2 dataset
dataset_name: ArxivClusteringP2P.v2
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: clustering
primary_metric: v_measure
azure_registry_name: azure-openai
azure_model_name: text-embedding-ada-002
azure_latest_model_version: 2
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
metrics:
v_measure: 0.4794210912494528

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arxivclusterings2s_cohere-embed-v3-english_clustering
version: 2.07.22
version: 2.04.11
display_name: ArxivClusteringS2S_cohere-embed-v3-english_clustering
description: cohere-embed-v3-english run for ArxivClusteringS2S dataset
dataset_name: ArxivClusteringS2S
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: clustering
primary_metric: v_measure
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-english
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
metrics:
v_measure: 0.38872349524931893

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arxivclusterings2s_cohere-embed-v3-multilingual_clustering
version: 2.07.22
version: 2.04.11
display_name: ArxivClusteringS2S_cohere-embed-v3-multilingual_clustering
description: cohere-embed-v3-multilingual run for ArxivClusteringS2S dataset
dataset_name: ArxivClusteringS2S
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: clustering
primary_metric: v_measure
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-multilingual
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
metrics:
v_measure: 0.3910885755785807

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arxivclusterings2s_text-embedding-3-large_clustering
version: 2.07.22
version: 2.04.11
display_name: ArxivClusteringS2S_text-embedding-3-large_clustering
description: text-embedding-3-large run for ArxivClusteringS2S dataset
dataset_name: ArxivClusteringS2S
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: clustering
primary_metric: v_measure
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-large
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
metrics:
v_measure: 0.4429783426306228

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arxivclusterings2s_text-embedding-3-small_clustering
version: 2.07.22
version: 2.04.11
display_name: ArxivClusteringS2S_text-embedding-3-small_clustering
description: text-embedding-3-small run for ArxivClusteringS2S dataset
dataset_name: ArxivClusteringS2S
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: clustering
primary_metric: v_measure
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-small
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
metrics:
v_measure: 0.3940951744128959

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: arxivclusterings2s_text-embedding-ada-002_clustering
version: 2.07.22
version: 2.04.11
display_name: ArxivClusteringS2S_text-embedding-ada-002_clustering
description: text-embedding-ada-002 run for ArxivClusteringS2S dataset
dataset_name: ArxivClusteringS2S
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: clustering
primary_metric: v_measure
azure_registry_name: azure-openai
azure_model_name: text-embedding-ada-002
azure_latest_model_version: 2
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
metrics:
v_measure: 0.3719179506563676

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: banking77classification_cohere-embed-v3-english_classification
version: 2.22.05
version: 2.04.11
display_name: Banking77Classification_cohere-embed-v3-english_classification
description: cohere-embed-v3-english run for Banking77Classification dataset
dataset_name: Banking77Classification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-english
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
metrics:
accuracy: 0.7934415584415586

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: banking77classification_cohere-embed-v3-multilingual_classification
version: 2.22.05
version: 2.04.11
display_name: Banking77Classification_cohere-embed-v3-multilingual_classification
description: cohere-embed-v3-multilingual run for Banking77Classification dataset
dataset_name: Banking77Classification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-multilingual
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
metrics:
accuracy: 0.7934415584415585

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: banking77classification_text-embedding-3-large_classification
version: 2.07.22
version: 2.04.11
display_name: Banking77Classification_text-embedding-3-large_classification
description: text-embedding-3-large run for Banking77Classification dataset
dataset_name: Banking77Classification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-large
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
metrics:
accuracy: 0.8572402597402597

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: banking77classification_text-embedding-3-small_classification
version: 2.07.22
version: 2.04.11
display_name: Banking77Classification_text-embedding-3-small_classification
description: text-embedding-3-small run for Banking77Classification dataset
dataset_name: Banking77Classification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-small
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
metrics:
accuracy: 0.8299025974025973

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: banking77classification_text-embedding-ada-002_classification
version: 2.22.05
version: 2.04.11
display_name: Banking77Classification_text-embedding-ada-002_classification
description: text-embedding-ada-002 run for Banking77Classification dataset
dataset_name: Banking77Classification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azure-openai
azure_model_name: text-embedding-ada-002
azure_latest_model_version: 2
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
metrics:
accuracy: 0.8053246753246753

Просмотреть файл

@ -1,23 +1,27 @@
type: evaluationresult
name: boolq__gpt-4-0125-preview__question_answering
version: 2.12.08
version: 2.04.11
display_name: boolq__gpt-4-0125-Preview__chat_completion
description: Benchmark__gpt40125__hf_boolq__chat_completion
dataset_family: boolq
dataset_name: boolq
model_name: gpt-4-0125-Preview
model_version: "4"
model_asset_id: azureml://registries/azure-openai/models/gpt-4-0125-Preview/versions/4
model_version: "0125-Preview"
model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/0125-Preview
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-4-0125-Preview/versions/4
assetId: azureml://registries/azure-openai/models/gpt-4/versions/0125-Preview
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4
azure_latest_model_version: turbo-2024-04-09
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
metrics:
accuracy: 0.904892966

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq__meta-llama-3_1-70b-instruct__question_answering
version: 2.12.08
version: 2.04.11
display_name: boolq__Meta-Llama-3_1-70B-Instruct__chat_completion
description: Benchmark__Llama-3-1-70B-Instruct-bench__hf_boolq__chat_completion
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Meta-Llama-3.1-70B-Instruct
azure_latest_model_version: 3
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3.1-70B-Instruct/versions/3
metrics:
accuracy: 0.909785933

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq__meta-llama-3_1-8b-instruct__question_answering
version: 2.12.08
version: 2.04.11
display_name: boolq__Meta-Llama-3_1-8B-Instruct__chat_completion
description: Benchmark__meta-llama-3-1-8b-instruct-1__hf_boolq__chat_completion
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Meta-Llama-3.1-8B-Instruct
azure_latest_model_version: 3
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3.1-8B-Instruct/versions/3
metrics:
accuracy: 0.868501529

Просмотреть файл

@ -1,23 +1,27 @@
type: evaluationresult
name: boolq_cohere_command_r_plus_question_answering
version: 2.30.04
version: 2.04.11
display_name: boolq_Cohere_command_r_plus_question_answering
description: Cohere-command-r-plus run for boolq dataset
dataset_family: boolq
dataset_name: boolq
model_name: Cohere-command-r-plus
model_version: "3"
model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/3
model_version: "1"
model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/1
relationships:
- relationshipType: Source
assetId: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/3
assetId: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/1
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-cohere
azure_model_name: Cohere-command-r-plus
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/1
metrics:
accuracy: 0.909480122

Просмотреть файл

@ -1,23 +1,27 @@
type: evaluationresult
name: boolq_cohere_command_r_question_answering
version: 2.30.04
version: 2.04.11
display_name: boolq_Cohere_command_r_question_answering
description: Cohere-command-r run for boolq dataset
dataset_family: boolq
dataset_name: boolq
model_name: Cohere-command-r
model_version: "3"
model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/3
model_version: "1"
model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/1
relationships:
- relationshipType: Source
assetId: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/3
assetId: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/1
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-cohere
azure_model_name: Cohere-command-r
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/1
metrics:
accuracy: 0.8819571865443425

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_databricks-dbrx-base_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_databricks-dbrx-base_question_answering
description: databricks-dbrx-base run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-restricted
azure_model_name: databricks-dbrx-base
azure_latest_model_version: 3
azure_latest_model_asset_id: azureml://registries/azureml-restricted/models/databricks-dbrx-base/versions/3
metrics:
accuracy: 0.9159021

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_databricks-dbrx-instruct_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_databricks-dbrx-instruct_question_answering
description: databricks-dbrx-instruct run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-restricted
azure_model_name: databricks-dbrx-instruct
azure_latest_model_version: 3
azure_latest_model_asset_id: azureml://registries/azureml-restricted/models/databricks-dbrx-instruct/versions/3
metrics:
accuracy: 0.9051988

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_gpt-4-turbo-2024-04-09_chat_completion
version: 2.07.05
version: 2.04.11
display_name: boolq_gpt-4-turbo-2024-04-09_chat_completion
description: boolq_gpt-4-turbo-2024-04-09_chat_completion
dataset_family: boolq
@ -8,16 +8,20 @@ dataset_name: boolq
model_name: gpt-4-turbo-2024-04-09
model_version: "turbo-2024-04-09"
model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/4
model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-4/versions/4
assetId: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4
azure_latest_model_version: turbo-2024-04-09
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
metrics:
accuracy: 0.9125382262996942

Просмотреть файл

@ -1,23 +1,27 @@
type: evaluationresult
name: boolq_gpt-4o_question_answering
version: 2.12.08
version: 2.04.11
display_name: boolq_gpt-4o_question_answering
description: gpt-4o run for boolq
dataset_family: boolq
dataset_name: boolq
model_name: gpt-4o
model_version: "5/13/2024"
model_asset_id: azureml://registries/azure-openai/models/gpt-4o/versions/1
model_name: gpt-4o-2024-05-13
model_version: "2024-05-13"
model_asset_id: azureml://registries/azure-openai/models/gpt-4o/versions/2024-05-13
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-4o/versions/1
assetId: azureml://registries/azure-openai/models/gpt-4o/versions/2024-05-13
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4o
azure_latest_model_version: 2024-08-06
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4o/versions/2024-08-06
metrics:
accuracy: 0.908562691

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_gpt_35_turbo_0301_question_answering
version: 2.03.05
version: 2.04.11
display_name: boolq_gpt_35_turbo_0301_question_answering
description: gpt-35-turbo-0301 run for boolq dataset
dataset_family: boolq
@ -8,16 +8,20 @@ dataset_name: boolq
model_name: gpt-35-turbo-0301
model_version: "0301"
model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/2
model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0301
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-35-turbo/versions/2
assetId: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0301
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-35-turbo
azure_latest_model_version: 0125
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0125
metrics:
accuracy: 0.867

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_gpt_35_turbo_0613_question_answering
version: 2.03.05
version: 2.04.11
display_name: boolq_gpt_35_turbo_0613_question_answering
description: gpt-35-turbo-0613 run for boolq dataset
dataset_family: boolq
@ -8,16 +8,20 @@ dataset_name: boolq
model_name: gpt-35-turbo-0613
model_version: "0613"
model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/2
model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0613
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-35-turbo/versions/2
assetId: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0613
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-35-turbo
azure_latest_model_version: 0125
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0125
metrics:
accuracy: 0.864

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_gpt_4_0314_question_answering
version: 2.03.05
version: 2.04.11
display_name: boolq_gpt_4_0314_question_answering
description: gpt-4-0314 run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4
azure_latest_model_version: turbo-2024-04-09
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
metrics:
accuracy: 0.911

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_gpt_4_0613_question_answering
version: 2.03.05
version: 2.04.11
display_name: boolq_gpt_4_0613_question_answering
description: gpt-4-0613 run for boolq dataset
dataset_family: boolq
@ -8,16 +8,20 @@ dataset_name: boolq
model_name: gpt-4-0613
model_version: "0613"
model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/4
model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/0613
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-4/versions/4
assetId: azureml://registries/azure-openai/models/gpt-4/versions/0613
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4
azure_latest_model_version: turbo-2024-04-09
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
metrics:
accuracy: 0.912

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_gpt_4_32k_0314_question_answering
version: 2.03.05
version: 2.04.11
display_name: boolq_gpt_4_32k_0314_question_answering
description: gpt-4-32k-0314 run for boolq dataset
dataset_family: boolq
@ -8,16 +8,20 @@ dataset_name: boolq
model_name: gpt-4-32k-0314
model_version: "0314"
model_asset_id: azureml://registries/azure-openai/models/gpt-4-32k/versions/315
model_asset_id: azureml://registries/azure-openai/models/gpt-4-32k/versions/0314
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-4-32k/versions/315
assetId: azureml://registries/azure-openai/models/gpt-4-32k/versions/0314
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4-32k
azure_latest_model_version: 0613
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4-32k/versions/0613
metrics:
accuracy: 0.913

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_gpt_4_32k_0613_question_answering
version: 2.03.05
version: 2.04.11
display_name: boolq_gpt_4_32k_0613_question_answering
description: gpt-4-32k-0613 run for boolq dataset
dataset_family: boolq
@ -8,16 +8,20 @@ dataset_name: boolq
model_name: gpt-4-32k-0613
model_version: "0613"
model_asset_id: azureml://registries/azure-openai/models/gpt-4-32k/versions/315
model_asset_id: azureml://registries/azure-openai/models/gpt-4-32k/versions/0613
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-4-32k/versions/315
assetId: azureml://registries/azure-openai/models/gpt-4-32k/versions/0613
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4-32k
azure_latest_model_version: 0613
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4-32k/versions/0613
metrics:
accuracy: 0.911

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_llama_2_13b_chat_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_llama_2_13b_chat_question_answering
description: llama-2-13b-chat run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Llama-2-13b-chat
azure_latest_model_version: 20
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-13b-chat/versions/20
metrics:
accuracy: 0.801

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_llama_2_13b_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_llama_2_13b_question_answering
description: llama-2-13b run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Llama-2-13b
azure_latest_model_version: 23
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-13b/versions/23
metrics:
accuracy: 0.723

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_llama_2_70b_chat_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_llama_2_70b_chat_question_answering
description: llama-2-70b-chat run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Llama-2-70b-chat
azure_latest_model_version: 20
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-70b-chat/versions/20
metrics:
accuracy: 0.826

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_llama_2_70b_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_llama_2_70b_question_answering
description: llama-2-70b run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Llama-2-70b
azure_latest_model_version: 24
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-70b/versions/24
metrics:
accuracy: 0.853

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_llama_2_7b_chat_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_llama_2_7b_chat_question_answering
description: llama-2-7b-chat run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Llama-2-7b-chat
azure_latest_model_version: 24
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-7b-chat/versions/24
metrics:
accuracy: 0.771

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_llama_2_7b_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_llama_2_7b_question_answering
description: llama-2-7b run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Llama-2-7b
azure_latest_model_version: 22
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-7b/versions/22
metrics:
accuracy: 0.628

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_meta-llama-3-70b-instruct_question_answering
version: 2.07.05
version: 2.04.11
display_name: boolq_Meta-Llama-3-70B-Instruct_question_answering
description: Meta-Llama-3-70B-Instruct run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Meta-Llama-3-70B-Instruct
azure_latest_model_version: 8
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-70B-Instruct/versions/8
metrics:
accuracy: 0.9027522935779817

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_meta-llama-3-70b_question_answering
version: 2.22.04
version: 2.04.11
display_name: boolq_Meta-Llama-3-70B_question_answering
description: Meta-Llama-3-70B run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Meta-Llama-3-70B
azure_latest_model_version: 6
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-70B/versions/6
metrics:
accuracy: 0.8917431192660551

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_meta-llama-3-8b-instruct_question_answering
version: 2.07.05
version: 2.04.11
display_name: boolq_Meta-Llama-3-8B-Instruct_question_answering
description: Meta-Llama-3-8B-Instruct run for boolq dataset
dataset_family: boolq
@ -18,9 +18,13 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Meta-Llama-3-8B-Instruct
azure_latest_model_version: 8
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-8B-Instruct/versions/8
metrics:
accuracy: 0.863302752293578
accuracy: 0.863302752
properties:

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_meta-llama-3-8b_question_answering
version: 2.22.04
version: 2.04.11
display_name: boolq_Meta-Llama-3-8B_question_answering
description: Meta-Llama-3-8B run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Meta-Llama-3-8B
azure_latest_model_version: 7
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-8B/versions/7
metrics:
accuracy: 0.8198776758409786

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_meta-llama-3_1-405b-instruct_question_answering
version: 2.12.08
version: 2.04.11
display_name: boolq_Meta-Llama-3_1-405B-Instruct_question_answering
description: Meta-Llama-3.1-405B-Instruct run for boolq
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Meta-Llama-3.1-405B-Instruct
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3.1-405B-Instruct/versions/1
metrics:
accuracy: 0.920489297

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_microsoft_phi_2_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_microsoft_phi_2_question_answering
description: microsoft-phi-2 run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-msr
azure_model_name: microsoft-phi-2
azure_latest_model_version: 19
azure_latest_model_asset_id: azureml://registries/azureml-msr/models/microsoft-phi-2/versions/19
metrics:
accuracy: 0.8079999999999999

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_mistral-community-mixtral-8x22b-v0-1_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_mistral-community-Mixtral-8x22B-v0-1_question_answering
description: mistral-community-Mixtral-8x22B-v0-1 run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: mistral-community-Mixtral-8x22B-v0-1
azure_latest_model_version: 5
azure_latest_model_asset_id: azureml://registries/azureml/models/mistral-community-Mixtral-8x22B-v0-1/versions/5
metrics:
accuracy: 0.8954128

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_mistral_7b_instruct_v01_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_mistral_7b_instruct_v01_question_answering
description: mistralai-mistral-7b-instruct-v01 run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: mistralai-Mistral-7B-Instruct-v01
azure_latest_model_version: 10
azure_latest_model_asset_id: azureml://registries/azureml/models/mistralai-Mistral-7B-Instruct-v01/versions/10
metrics:
accuracy: 0.777

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_mistral_7b_v01_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_mistral_7b_v01_question_answering
description: mistralai-mistral-7b-v01 run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: mistralai-Mistral-7B-v01
azure_latest_model_version: 17
azure_latest_model_asset_id: azureml://registries/azureml/models/mistralai-Mistral-7B-v01/versions/17
metrics:
accuracy: 0.828

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_mistral_large_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_mistral_large_question_answering
description: mistral-large run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-mistral
azure_model_name: Mistral-large
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-mistral/models/Mistral-large/versions/1
metrics:
accuracy: 0.8993884

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_mistralai-mistral-7b-instruct-v0-2_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_mistralai-Mistral-7B-Instruct-v0-2_question_answering
description: mistralai-Mistral-7B-Instruct-v0-2 run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: mistralai-Mistral-7B-Instruct-v0-2
azure_latest_model_version: 5
azure_latest_model_asset_id: azureml://registries/azureml/models/mistralai-Mistral-7B-Instruct-v0-2/versions/5
metrics:
accuracy: 0.795107

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_mistralai-mixtral-8x22b-instruct-v0-1_question_answering
version: 2.22.04
version: 2.04.11
display_name: boolq_mistralai-Mixtral-8x22B-Instruct-v0-1_question_answering
description: mistralai-Mixtral-8x22B-Instruct-v0-1 run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: mistralai-Mixtral-8x22B-Instruct-v0-1
azure_latest_model_version: 4
azure_latest_model_asset_id: azureml://registries/azureml/models/mistralai-Mixtral-8x22B-Instruct-v0-1/versions/4
metrics:
accuracy: 0.8749235474006116

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_mistralai-mixtral-8x22b-v0-1_question_answering
version: 2.22.04
version: 2.04.11
display_name: boolq_mistralai-Mixtral-8x22B-v0-1_question_answering
description: mistralai-Mixtral-8x22B-v0-1 run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: mistralai-Mixtral-8x22B-v0-1
azure_latest_model_version: 4
azure_latest_model_asset_id: azureml://registries/azureml/models/mistralai-Mixtral-8x22B-v0-1/versions/4
metrics:
accuracy: 0.8975535168195719

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_mistralai-mixtral-8x7b-instruct-v01_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_mistralai-Mixtral-8x7B-Instruct-v01_question_answering
description: mistralai-Mixtral-8x7B-Instruct-v01 run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: mistralai-Mixtral-8x7B-Instruct-v01
azure_latest_model_version: 9
azure_latest_model_asset_id: azureml://registries/azureml/models/mistralai-Mixtral-8x7B-Instruct-v01/versions/9
metrics:
accuracy: 0.8345566

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_mistralai-mixtral-8x7b-v01_question_answering
version: 2.19.040
version: 2.04.11
display_name: boolq_mistralai-Mixtral-8x7B-v01_question_answering
description: mistralai-Mixtral-8x7B-v01 run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: mistralai-Mixtral-8x7B-v01
azure_latest_model_version: 14
azure_latest_model_asset_id: azureml://registries/azureml/models/mistralai-Mixtral-8x7B-v01/versions/14
metrics:
accuracy: 0.8724771

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_phi-3-medium-128k-instruct_question_answering
version: 2.06.18
version: 2.04.11
display_name: boolq_Phi-3-medium-128k-instruct_question_answering
description: boolq_Phi-3-medium-128k-instruct_question_answering
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: Phi-3-medium-128k-instruct
azure_latest_model_version: 5
azure_latest_model_asset_id: azureml://registries/azureml/models/Phi-3-medium-128k-instruct/versions/5
metrics:
accuracy: 0.8795107033639143

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_phi-3-medium-4k-instruct_question_answering
version: 2.06.18
version: 2.04.11
display_name: boolq_Phi-3-medium-4k-instruct_question_answering
description: boolq_Phi-3-medium-4k-instruct_question_answering
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: Phi-3-medium-4k-instruct
azure_latest_model_version: 4
azure_latest_model_asset_id: azureml://registries/azureml/models/Phi-3-medium-4k-instruct/versions/4
metrics:
accuracy: 0.8819571865443425

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_phi-3-mini-128k-instruct_chat_completion
version: 2.06.21
version: 2.04.11
display_name: boolq_phi-3-mini-128k-instruct_chat_completion
description: boolq_phi-3-mini-128k-instruct_chat_completion
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: Phi-3-mini-128k-instruct
azure_latest_model_version: 11
azure_latest_model_asset_id: azureml://registries/azureml/models/Phi-3-mini-128k-instruct/versions/11
metrics:
accuracy: 0.8492354740061162

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_phi-3-mini-4k-instruct_chat_completion
version: 2.06.21
version: 2.04.11
display_name: boolq_phi-3-mini-4k-instruct_chat_completion
description: boolq_phi-3-mini-4k-instruct_chat_completion
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml
azure_model_name: Phi-3-mini-4k-instruct
azure_latest_model_version: 13
azure_latest_model_asset_id: azureml://registries/azureml/models/Phi-3-mini-4k-instruct/versions/13
metrics:
accuracy: 0.850764526

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_phi-3-small-128k-instruct_question_answering
version: 2.06.18
version: 2.04.11
display_name: boolq_Phi-3-small-128k-instruct_question_answering
description: Phi-3-small-128k-instruct run for boolq dataset
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: models-staging
azure_model_name: Phi-3-small-128k-instruct
azure_latest_model_version: 7
azure_latest_model_asset_id: azureml://registries/models-staging/models/Phi-3-small-128k-instruct/versions/7
metrics:
accuracy: 0.8764525993883792

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: boolq_phi-3-small-8k-instruct_question_answering
version: 2.06.18
version: 2.04.11
display_name: boolq_Phi-3-small-8k-instruct_question_answering
description: boolq_Phi-3-small-8k-instruct_question_answering
dataset_family: boolq
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: models-staging
azure_model_name: Phi-3-small-8k-instruct
azure_latest_model_version: 7
azure_latest_model_asset_id: azureml://registries/models-staging/models/Phi-3-small-8k-instruct/versions/7
metrics:
accuracy: 0.8681957186544342

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: bucc_cohere-embed-v3-english_bitext_mining
version: 2.22.05
version: 2.04.11
display_name: BUCC_cohere-embed-v3-english_bitext_mining
description: cohere-embed-v3-english run for BUCC dataset
dataset_name: BUCC
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: bitext_mining
primary_metric: f1_score
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-english
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
metrics:
f1_score: 0.293725473226098

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: bucc_cohere-embed-v3-multilingual_bitext_mining
version: 2.22.05
version: 2.04.11
display_name: BUCC_cohere-embed-v3-multilingual_bitext_mining
description: cohere-embed-v3-multilingual run for BUCC dataset
dataset_name: BUCC
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: bitext_mining
primary_metric: f1_score
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-multilingual
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
metrics:
f1_score: 0.9837602031657908

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: bucc_text-embedding-3-large_bitext_mining
version: 2.07.22
version: 2.04.11
display_name: BUCC_text-embedding-3-large_bitext_mining
description: text-embedding-3-large run for BUCC dataset
dataset_name: BUCC
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: bitext_mining
primary_metric: f1_score
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-large
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
metrics:
f1_score: 0.9891511897531617

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: bucc_text-embedding-3-small_bitext_mining
version: 2.07.22
version: 2.04.11
display_name: BUCC_text-embedding-3-small_bitext_mining
description: text-embedding-3-small run for BUCC dataset
dataset_name: BUCC
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: bitext_mining
primary_metric: f1_score
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-small
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
metrics:
f1_score: 0.9820643057041419

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: bucc_text-embedding-ada-002_bitext_mining
version: 2.22.05
version: 2.04.11
display_name: BUCC_text-embedding-ada-002_bitext_mining
description: text-embedding-ada-002 run for BUCC dataset
dataset_name: BUCC
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: bitext_mining
primary_metric: f1_score
azure_registry_name: azure-openai
azure_model_name: text-embedding-ada-002
azure_latest_model_version: 2
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
metrics:
f1_score: 0.9765893479830264

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: emotionclassification_cohere-embed-v3-english_classification
version: 2.07.22
version: 2.04.11
display_name: EmotionClassification_cohere-embed-v3-english_classification
description: cohere-embed-v3-english run for EmotionClassification dataset
dataset_name: EmotionClassification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-english
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
metrics:
accuracy: 0.5219999999999999

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: emotionclassification_cohere-embed-v3-multilingual_classification
version: 2.07.22
version: 2.04.11
display_name: EmotionClassification_cohere-embed-v3-multilingual_classification
description: cohere-embed-v3-multilingual run for EmotionClassification dataset
dataset_name: EmotionClassification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azureml-cohere
azure_model_name: Cohere-embed-v3-multilingual
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
metrics:
accuracy: 0.5022499999999999

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: emotionclassification_text-embedding-3-large_classification
version: 2.07.22
version: 2.04.11
display_name: EmotionClassification_text-embedding-3-large_classification
description: text-embedding-3-large run for EmotionClassification dataset
dataset_name: EmotionClassification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-large
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
metrics:
accuracy: 0.5149

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: emotionclassification_text-embedding-3-small_classification
version: 2.07.22
version: 2.04.11
display_name: EmotionClassification_text-embedding-3-small_classification
description: text-embedding-3-small run for EmotionClassification dataset
dataset_name: EmotionClassification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azure-openai
azure_model_name: text-embedding-3-small
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
metrics:
accuracy: 0.5061500000000001

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: emotionclassification_text-embedding-ada-002_classification
version: 2.07.22
version: 2.04.11
display_name: EmotionClassification_text-embedding-ada-002_classification
description: text-embedding-ada-002 run for EmotionClassification dataset
dataset_name: EmotionClassification
@ -17,6 +17,10 @@ tags:
evaluation_type: text_embeddings
task: classification
primary_metric: accuracy
azure_registry_name: azure-openai
azure_model_name: text-embedding-ada-002
azure_latest_model_version: 2
azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
metrics:
accuracy: 0.48785

Просмотреть файл

@ -1,23 +1,27 @@
type: evaluationresult
name: gsm8k__gpt-4-0125-preview__question_answering
version: 2.12.08
version: 2.04.11
display_name: gsm8k__gpt-4-0125-Preview__chat_completion
description: Benchmark__gpt40125__hf_gsm8k__chat_completion
dataset_family: gsm8k
dataset_name: gsm8k
model_name: gpt-4-0125-Preview
model_version: "4"
model_asset_id: azureml://registries/azure-openai/models/gpt-4-0125-Preview/versions/4
model_version: "0125-Preview"
model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/0125-Preview
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-4-0125-Preview/versions/4
assetId: azureml://registries/azure-openai/models/gpt-4/versions/0125-Preview
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4
azure_latest_model_version: turbo-2024-04-09
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
metrics:
accuracy: 0.93555724

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: gsm8k__meta-llama-3_1-70b-instruct__question_answering
version: 2.12.08
version: 2.04.11
display_name: gsm8k__Meta-Llama-3_1-70B-Instruct__chat_completion
description: Benchmark__Llama-3-1-70B-Instruct-bench__hf_gsm8k__chat_completion
dataset_family: gsm8k
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Meta-Llama-3.1-70B-Instruct
azure_latest_model_version: 3
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3.1-70B-Instruct/versions/3
metrics:
accuracy: 0.946929492

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: gsm8k__meta-llama-3_1-8b-instruct__question_answering
version: 2.12.08
version: 2.04.11
display_name: gsm8k__Meta-Llama-3_1-8B-Instruct__chat_completion
description: Benchmark__meta-llama-3-1-8b-instruct-1__hf_gsm8k__chat_completion
dataset_family: gsm8k
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-meta
azure_model_name: Meta-Llama-3.1-8B-Instruct
azure_latest_model_version: 3
azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3.1-8B-Instruct/versions/3
metrics:
accuracy: 0.843062926

Просмотреть файл

@ -1,23 +1,27 @@
type: evaluationresult
name: gsm8k_cohere_command_r_plus_question_answering
version: 2.30.04
version: 2.04.11
display_name: gsm8k_Cohere_command_r_plus_question_answering
description: Cohere-command-r-plus run for gsm8k dataset
dataset_family: gsm8k
dataset_name: gsm8k
model_name: Cohere-command-r-plus
model_version: "3"
model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/3
model_version: "1"
model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/1
relationships:
- relationshipType: Source
assetId: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/3
assetId: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/1
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-cohere
azure_model_name: Cohere-command-r-plus
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/1
metrics:
accuracy: 0.7626990144048522

Просмотреть файл

@ -1,23 +1,27 @@
type: evaluationresult
name: gsm8k_cohere_command_r_question_answering
version: 2.30.04
version: 2.04.11
display_name: gsm8k_Cohere_command_r_question_answering
description: Cohere-command-r run for gsm8k dataset
dataset_family: gsm8k
dataset_name: gsm8k
model_name: Cohere-command-r
model_version: "3"
model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/3
model_version: "1"
model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/1
relationships:
- relationshipType: Source
assetId: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/3
assetId: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/1
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-cohere
azure_model_name: Cohere-command-r
azure_latest_model_version: 1
azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/1
metrics:
accuracy: 0.5981804397270659

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: gsm8k_databricks-dbrx-base_question_answering
version: 2.19.040
version: 2.04.11
display_name: gsm8k_databricks-dbrx-base_question_answering
description: databricks-dbrx-base run for gsm8k dataset
dataset_family: gsm8k
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-restricted
azure_model_name: databricks-dbrx-base
azure_latest_model_version: 3
azure_latest_model_asset_id: azureml://registries/azureml-restricted/models/databricks-dbrx-base/versions/3
metrics:
accuracy: 0.702047

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: gsm8k_databricks-dbrx-instruct_question_answering
version: 2.19.040
version: 2.04.11
display_name: gsm8k_databricks-dbrx-instruct_question_answering
description: databricks-dbrx-instruct run for gsm8k dataset
dataset_family: gsm8k
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azureml-restricted
azure_model_name: databricks-dbrx-instruct
azure_latest_model_version: 3
azure_latest_model_asset_id: azureml://registries/azureml-restricted/models/databricks-dbrx-instruct/versions/3
metrics:
accuracy: 0.7202426

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: gsm8k_gpt-4-turbo-2024-04-09_chat_completion
version: 2.07.05
version: 2.04.11
display_name: gsm8k_gpt-4-turbo-2024-04-09_chat_completion
description: gsm8k_gpt-4-turbo-2024-04-09_chat_completion
dataset_family: gsm8k
@ -8,16 +8,20 @@ dataset_name: gsm8k
model_name: gpt-4-turbo-2024-04-09
model_version: "turbo-2024-04-09"
model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/4
model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-4/versions/4
assetId: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4
azure_latest_model_version: turbo-2024-04-09
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
metrics:
accuracy: 0.9484457922668689

Просмотреть файл

@ -1,23 +1,27 @@
type: evaluationresult
name: gsm8k_gpt-4o_question_answering
version: 2.12.08
version: 2.04.11
display_name: gsm8k_gpt-4o_question_answering
description: gpt-4o run for gsm8k
dataset_family: gsm8k
dataset_name: gsm8k
model_name: gpt-4o
model_version: "5/13/2024"
model_asset_id: azureml://registries/azure-openai/models/gpt-4o/versions/1
model_name: gpt-4o-2024-05-13
model_version: "2024-05-13"
model_asset_id: azureml://registries/azure-openai/models/gpt-4o/versions/2024-05-13
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-4o/versions/1
assetId: azureml://registries/azure-openai/models/gpt-4o/versions/2024-05-13
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4o
azure_latest_model_version: 2024-08-06
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4o/versions/2024-08-06
metrics:
accuracy: 0.945413192

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: gsm8k_gpt_35_turbo_0301_question_answering
version: 2.03.05
version: 2.04.11
display_name: gsm8k_gpt_35_turbo_0301_question_answering
description: gpt-35-turbo-0301 run for gsm8k dataset
dataset_family: gsm8k
@ -8,16 +8,20 @@ dataset_name: gsm8k
model_name: gpt-35-turbo-0301
model_version: "0301"
model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/2
model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0301
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-35-turbo/versions/2
assetId: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0301
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-35-turbo
azure_latest_model_version: 0125
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0125
metrics:
accuracy: 0.826

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: gsm8k_gpt_35_turbo_0613_question_answering
version: 2.03.05
version: 2.04.11
display_name: gsm8k_gpt_35_turbo_0613_question_answering
description: gpt-35-turbo-0613 run for gsm8k dataset
dataset_family: gsm8k
@ -8,16 +8,20 @@ dataset_name: gsm8k
model_name: gpt-35-turbo-0613
model_version: "0613"
model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/2
model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0613
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-35-turbo/versions/2
assetId: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0613
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-35-turbo
azure_latest_model_version: 0125
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0125
metrics:
accuracy: 0.782

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: gsm8k_gpt_4_0314_question_answering
version: 2.03.05
version: 2.04.11
display_name: gsm8k_gpt_4_0314_question_answering
description: gpt-4-0314 run for gsm8k dataset
dataset_family: gsm8k
@ -18,6 +18,10 @@ tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4
azure_latest_model_version: turbo-2024-04-09
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
metrics:
accuracy: 0.945

Просмотреть файл

@ -1,6 +1,6 @@
type: evaluationresult
name: gsm8k_gpt_4_0613_question_answering
version: 2.03.05
version: 2.04.11
display_name: gsm8k_gpt_4_0613_question_answering
description: gpt-4-0613 run for gsm8k dataset
dataset_family: gsm8k
@ -8,16 +8,20 @@ dataset_name: gsm8k
model_name: gpt-4-0613
model_version: "0613"
model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/4
model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/0613
relationships:
- relationshipType: Source
assetId: azureml://registries/azure-openai/models/gpt-4/versions/4
assetId: azureml://registries/azure-openai/models/gpt-4/versions/0613
tags:
evaluation_type: text_generation
task: question-answering
accuracy_metric_name: exact_match
azure_registry_name: azure-openai
azure_model_name: gpt-4
azure_latest_model_version: turbo-2024-04-09
azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
metrics:
accuracy: 0.938

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше