update evaluation results from azureml-assets to azureml-asset repo.

2024-11-08 12:39:19 +05:30 · 2024-11-08 12:39:19 +05:30 · 797f4b28fb
--- a/assets/evaluation_results/amazonpolarityclassification_cohere-embed-v3-english_classification/asset.yaml
+++ b/assets/evaluation_results/amazonpolarityclassification_cohere-embed-v3-english_classification/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/amazonpolarityclassification_cohere-embed-v3-english_classification/spec.yaml
+++ b/assets/evaluation_results/amazonpolarityclassification_cohere-embed-v3-english_classification/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: amazonpolarityclassification_cohere-embed-v3-english_classification
-version: 2.04.11
-display_name: AmazonPolarityClassification_cohere-embed-v3-english_classification
-description: cohere-embed-v3-english run for AmazonPolarityClassification dataset
-dataset_name: AmazonPolarityClassification
-dataset_family: AmazonPolarityClassification
-
-model_name: cohere-embed-v3-english
-model_version: "3"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/3
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/3
-
-tags:
-    evaluation_type: text_embeddings
-    task: classification
-    primary_metric: accuracy
-    azure_registry_name: azureml-cohere
-    azure_model_name: Cohere-embed-v3-english
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
-
-metrics:
-    accuracy: 0.927643
-
-properties: {}
--- a/assets/evaluation_results/amazonpolarityclassification_cohere-embed-v3-multilingual_classification/asset.yaml
+++ b/assets/evaluation_results/amazonpolarityclassification_cohere-embed-v3-multilingual_classification/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/amazonpolarityclassification_cohere-embed-v3-multilingual_classification/spec.yaml
+++ b/assets/evaluation_results/amazonpolarityclassification_cohere-embed-v3-multilingual_classification/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: amazonpolarityclassification_cohere-embed-v3-multilingual_classification
-version: 2.04.11
-display_name: AmazonPolarityClassification_cohere-embed-v3-multilingual_classification
-description: cohere-embed-v3-multilingual run for AmazonPolarityClassification dataset
-dataset_name: AmazonPolarityClassification
-dataset_family: AmazonPolarityClassification
-
-model_name: cohere-embed-v3-multilingual
-model_version: "3"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/3
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/3
-
-tags:
-    evaluation_type: text_embeddings
-    task: classification
-    primary_metric: accuracy
-    azure_registry_name: azureml-cohere
-    azure_model_name: Cohere-embed-v3-multilingual
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
-
-metrics:
-    accuracy: 0.912307
-
-properties: {}
--- a/assets/evaluation_results/amazonpolarityclassification_text-embedding-3-large_classification/asset.yaml
+++ b/assets/evaluation_results/amazonpolarityclassification_text-embedding-3-large_classification/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/amazonpolarityclassification_text-embedding-3-large_classification/spec.yaml
+++ b/assets/evaluation_results/amazonpolarityclassification_text-embedding-3-large_classification/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: amazonpolarityclassification_text-embedding-3-large_classification
-version: 2.04.11
-display_name: AmazonPolarityClassification_text-embedding-3-large_classification
-description: text-embedding-3-large run for AmazonPolarityClassification dataset
-dataset_name: AmazonPolarityClassification
-dataset_family: AmazonPolarityClassification
-
-model_name: text-embedding-3-large
-model_version: "1"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-
-tags:
-    evaluation_type: text_embeddings
-    task: classification
-    primary_metric: accuracy
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-3-large
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-
-metrics:
-    accuracy: 0.92868975
-
-properties: {}
--- a/assets/evaluation_results/amazonpolarityclassification_text-embedding-3-small_classification/asset.yaml
+++ b/assets/evaluation_results/amazonpolarityclassification_text-embedding-3-small_classification/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/amazonpolarityclassification_text-embedding-3-small_classification/spec.yaml
+++ b/assets/evaluation_results/amazonpolarityclassification_text-embedding-3-small_classification/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: amazonpolarityclassification_text-embedding-3-small_classification
-version: 2.04.11
-display_name: AmazonPolarityClassification_text-embedding-3-small_classification
-description: text-embedding-3-small run for AmazonPolarityClassification dataset
-dataset_name: AmazonPolarityClassification
-dataset_family: AmazonPolarityClassification
-
-model_name: text-embedding-3-small
-model_version: "1"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-
-tags:
-    evaluation_type: text_embeddings
-    task: classification
-    primary_metric: accuracy
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-3-small
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-
-metrics:
-    accuracy: 0.90878075
-
-properties: {}
--- a/assets/evaluation_results/amazonpolarityclassification_text-embedding-ada-002_classification/asset.yaml
+++ b/assets/evaluation_results/amazonpolarityclassification_text-embedding-ada-002_classification/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/amazonpolarityclassification_text-embedding-ada-002_classification/spec.yaml
+++ b/assets/evaluation_results/amazonpolarityclassification_text-embedding-ada-002_classification/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: amazonpolarityclassification_text-embedding-ada-002_classification
-version: 2.04.11
-display_name: AmazonPolarityClassification_text-embedding-ada-002_classification
-description: text-embedding-ada-002 run for AmazonPolarityClassification dataset
-dataset_name: AmazonPolarityClassification
-dataset_family: AmazonPolarityClassification
-
-model_name: text-embedding-ada-002
-model_version: "2"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-
-tags:
-    evaluation_type: text_embeddings
-    task: classification
-    primary_metric: accuracy
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-ada-002
-    azure_latest_model_version: 2
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-
-metrics:
-    accuracy: 0.867263
-
-properties: {}
--- a/assets/evaluation_results/arguana_cohere-embed-v3-english_retrieval/asset.yaml
+++ b/assets/evaluation_results/arguana_cohere-embed-v3-english_retrieval/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arguana_cohere-embed-v3-english_retrieval/spec.yaml
+++ b/assets/evaluation_results/arguana_cohere-embed-v3-english_retrieval/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arguana_cohere-embed-v3-english_retrieval
-version: 2.04.11
-display_name: ArguAna_cohere-embed-v3-english_retrieval
-description: cohere-embed-v3-english run for ArguAna dataset
-dataset_name: ArguAna
-dataset_family: ArguAna
-
-model_name: cohere-embed-v3-english
-model_version: "3"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/3
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/3
-
-tags:
-    evaluation_type: text_embeddings
-    task: retrieval
-    primary_metric: ndcg_at_10
-    azure_registry_name: azureml-cohere
-    azure_model_name: Cohere-embed-v3-english
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
-
-metrics:
-    ndcg_at_10: 0.57529
-
-properties: {}
--- a/assets/evaluation_results/arguana_cohere-embed-v3-multilingual_retrieval/asset.yaml
+++ b/assets/evaluation_results/arguana_cohere-embed-v3-multilingual_retrieval/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arguana_cohere-embed-v3-multilingual_retrieval/spec.yaml
+++ b/assets/evaluation_results/arguana_cohere-embed-v3-multilingual_retrieval/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arguana_cohere-embed-v3-multilingual_retrieval
-version: 2.04.11
-display_name: ArguAna_cohere-embed-v3-multilingual_retrieval
-description: cohere-embed-v3-multilingual run for ArguAna dataset
-dataset_name: ArguAna
-dataset_family: ArguAna
-
-model_name: cohere-embed-v3-multilingual
-model_version: "3"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/3
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/3
-
-tags:
-    evaluation_type: text_embeddings
-    task: retrieval
-    primary_metric: ndcg_at_10
-    azure_registry_name: azureml-cohere
-    azure_model_name: Cohere-embed-v3-multilingual
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
-
-metrics:
-    ndcg_at_10: 0.57989
-
-properties: {}
--- a/assets/evaluation_results/arguana_text-embedding-3-large_retrieval/asset.yaml
+++ b/assets/evaluation_results/arguana_text-embedding-3-large_retrieval/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arguana_text-embedding-3-large_retrieval/spec.yaml
+++ b/assets/evaluation_results/arguana_text-embedding-3-large_retrieval/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arguana_text-embedding-3-large_retrieval
-version: 2.04.11
-display_name: ArguAna_text-embedding-3-large_retrieval
-description: text-embedding-3-large run for ArguAna dataset
-dataset_name: ArguAna
-dataset_family: ArguAna
-
-model_name: text-embedding-3-large
-model_version: "1"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-
-tags:
-    evaluation_type: text_embeddings
-    task: retrieval
-    primary_metric: ndcg_at_10
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-3-large
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-
-metrics:
-    ndcg_at_10: 0.58013
-
-properties: {}
--- a/assets/evaluation_results/arguana_text-embedding-3-small_retrieval/asset.yaml
+++ b/assets/evaluation_results/arguana_text-embedding-3-small_retrieval/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arguana_text-embedding-3-small_retrieval/spec.yaml
+++ b/assets/evaluation_results/arguana_text-embedding-3-small_retrieval/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arguana_text-embedding-3-small_retrieval
-version: 2.04.11
-display_name: ArguAna_text-embedding-3-small_retrieval
-description: text-embedding-3-small run for ArguAna dataset
-dataset_name: ArguAna
-dataset_family: ArguAna
-
-model_name: text-embedding-3-small
-model_version: "1"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-
-tags:
-    evaluation_type: text_embeddings
-    task: retrieval
-    primary_metric: ndcg_at_10
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-3-small
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-
-metrics:
-    ndcg_at_10: 0.55694
-
-properties: {}
--- a/assets/evaluation_results/arguana_text-embedding-ada-002_retrieval/asset.yaml
+++ b/assets/evaluation_results/arguana_text-embedding-ada-002_retrieval/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arguana_text-embedding-ada-002_retrieval/spec.yaml
+++ b/assets/evaluation_results/arguana_text-embedding-ada-002_retrieval/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arguana_text-embedding-ada-002_retrieval
-version: 2.04.11
-display_name: ArguAna_text-embedding-ada-002_retrieval
-description: text-embedding-ada-002 run for ArguAna dataset
-dataset_name: ArguAna
-dataset_family: ArguAna
-
-model_name: text-embedding-ada-002
-model_version: "2"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-
-tags:
-    evaluation_type: text_embeddings
-    task: retrieval
-    primary_metric: ndcg_at_10
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-ada-002
-    azure_latest_model_version: 2
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-
-metrics:
-    ndcg_at_10: 0.57455
-
-properties: {}
--- a/assets/evaluation_results/arxivclusteringp2p.v2_cohere-embed-v3-english_clustering/asset.yaml
+++ b/assets/evaluation_results/arxivclusteringp2p.v2_cohere-embed-v3-english_clustering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arxivclusteringp2p.v2_cohere-embed-v3-english_clustering/spec.yaml
+++ b/assets/evaluation_results/arxivclusteringp2p.v2_cohere-embed-v3-english_clustering/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arxivclusteringp2p.v2_cohere-embed-v3-english_clustering
-version: 2.04.11
-display_name: ArxivClusteringP2P.v2_cohere-embed-v3-english_clustering
-description: cohere-embed-v3-english run for ArxivClusteringP2P.v2 dataset
-dataset_name: ArxivClusteringP2P.v2
-dataset_family: ArxivClusteringP2P.v2
-
-model_name: cohere-embed-v3-english
-model_version: "3"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/3
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/3
-
-tags:
-    evaluation_type: text_embeddings
-    task: clustering
-    primary_metric: v_measure
-    azure_registry_name: azureml-cohere
-    azure_model_name: Cohere-embed-v3-english
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
-
-metrics:
-    v_measure: 0.5081042703542442
-
-properties: {}
--- a/assets/evaluation_results/arxivclusteringp2p.v2_cohere-embed-v3-multilingual_clustering/asset.yaml
+++ b/assets/evaluation_results/arxivclusteringp2p.v2_cohere-embed-v3-multilingual_clustering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arxivclusteringp2p.v2_cohere-embed-v3-multilingual_clustering/spec.yaml
+++ b/assets/evaluation_results/arxivclusteringp2p.v2_cohere-embed-v3-multilingual_clustering/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arxivclusteringp2p.v2_cohere-embed-v3-multilingual_clustering
-version: 2.04.11
-display_name: ArxivClusteringP2P.v2_cohere-embed-v3-multilingual_clustering
-description: cohere-embed-v3-multilingual run for ArxivClusteringP2P.v2 dataset
-dataset_name: ArxivClusteringP2P.v2
-dataset_family: ArxivClusteringP2P.v2
-
-model_name: cohere-embed-v3-multilingual
-model_version: "3"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/3
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/3
-
-tags:
-    evaluation_type: text_embeddings
-    task: clustering
-    primary_metric: v_measure
-    azure_registry_name: azureml-cohere
-    azure_model_name: Cohere-embed-v3-multilingual
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
-
-metrics:
-    v_measure: 0.5029184573976476
-
-properties: {}
--- a/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-3-large_clustering/asset.yaml
+++ b/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-3-large_clustering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-3-large_clustering/spec.yaml
+++ b/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-3-large_clustering/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arxivclusteringp2p.v2_text-embedding-3-large_clustering
-version: 2.04.11
-display_name: ArxivClusteringP2P.v2_text-embedding-3-large_clustering
-description: text-embedding-3-large run for ArxivClusteringP2P.v2 dataset
-dataset_name: ArxivClusteringP2P.v2
-dataset_family: ArxivClusteringP2P.v2
-
-model_name: text-embedding-3-large
-model_version: "1"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-
-tags:
-    evaluation_type: text_embeddings
-    task: clustering
-    primary_metric: v_measure
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-3-large
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-
-metrics:
-    v_measure: 0.519053128352996
-
-properties: {}
--- a/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-3-small_clustering/asset.yaml
+++ b/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-3-small_clustering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-3-small_clustering/spec.yaml
+++ b/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-3-small_clustering/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arxivclusteringp2p.v2_text-embedding-3-small_clustering
-version: 2.04.11
-display_name: ArxivClusteringP2P.v2_text-embedding-3-small_clustering
-description: text-embedding-3-small run for ArxivClusteringP2P.v2 dataset
-dataset_name: ArxivClusteringP2P.v2
-dataset_family: ArxivClusteringP2P.v2
-
-model_name: text-embedding-3-small
-model_version: "1"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-
-tags:
-    evaluation_type: text_embeddings
-    task: clustering
-    primary_metric: v_measure
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-3-small
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-
-metrics:
-    v_measure: 0.496692276507199
-
-properties: {}
--- a/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-ada-002_clustering/asset.yaml
+++ b/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-ada-002_clustering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-ada-002_clustering/spec.yaml
+++ b/assets/evaluation_results/arxivclusteringp2p.v2_text-embedding-ada-002_clustering/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arxivclusteringp2p.v2_text-embedding-ada-002_clustering
-version: 2.04.11
-display_name: ArxivClusteringP2P.v2_text-embedding-ada-002_clustering
-description: text-embedding-ada-002 run for ArxivClusteringP2P.v2 dataset
-dataset_name: ArxivClusteringP2P.v2
-dataset_family: ArxivClusteringP2P.v2
-
-model_name: text-embedding-ada-002
-model_version: "2"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-
-tags:
-    evaluation_type: text_embeddings
-    task: clustering
-    primary_metric: v_measure
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-ada-002
-    azure_latest_model_version: 2
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-
-metrics:
-    v_measure: 0.4794210912494528
-
-properties: {}
--- a/assets/evaluation_results/arxivclusterings2s_cohere-embed-v3-english_clustering/asset.yaml
+++ b/assets/evaluation_results/arxivclusterings2s_cohere-embed-v3-english_clustering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arxivclusterings2s_cohere-embed-v3-english_clustering/spec.yaml
+++ b/assets/evaluation_results/arxivclusterings2s_cohere-embed-v3-english_clustering/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arxivclusterings2s_cohere-embed-v3-english_clustering
-version: 2.04.11
-display_name: ArxivClusteringS2S_cohere-embed-v3-english_clustering
-description: cohere-embed-v3-english run for ArxivClusteringS2S dataset
-dataset_name: ArxivClusteringS2S
-dataset_family: ArxivClusteringS2S
-
-model_name: cohere-embed-v3-english
-model_version: "3"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/3
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/3
-
-tags:
-    evaluation_type: text_embeddings
-    task: clustering
-    primary_metric: v_measure
-    azure_registry_name: azureml-cohere
-    azure_model_name: Cohere-embed-v3-english
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
-
-metrics:
-    v_measure: 0.38872349524931893
-
-properties: {}
--- a/assets/evaluation_results/arxivclusterings2s_cohere-embed-v3-multilingual_clustering/asset.yaml
+++ b/assets/evaluation_results/arxivclusterings2s_cohere-embed-v3-multilingual_clustering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arxivclusterings2s_cohere-embed-v3-multilingual_clustering/spec.yaml
+++ b/assets/evaluation_results/arxivclusterings2s_cohere-embed-v3-multilingual_clustering/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arxivclusterings2s_cohere-embed-v3-multilingual_clustering
-version: 2.04.11
-display_name: ArxivClusteringS2S_cohere-embed-v3-multilingual_clustering
-description: cohere-embed-v3-multilingual run for ArxivClusteringS2S dataset
-dataset_name: ArxivClusteringS2S
-dataset_family: ArxivClusteringS2S
-
-model_name: cohere-embed-v3-multilingual
-model_version: "3"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/3
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/3
-
-tags:
-    evaluation_type: text_embeddings
-    task: clustering
-    primary_metric: v_measure
-    azure_registry_name: azureml-cohere
-    azure_model_name: Cohere-embed-v3-multilingual
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
-
-metrics:
-    v_measure: 0.3910885755785807
-
-properties: {}
--- a/assets/evaluation_results/arxivclusterings2s_text-embedding-3-large_clustering/asset.yaml
+++ b/assets/evaluation_results/arxivclusterings2s_text-embedding-3-large_clustering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arxivclusterings2s_text-embedding-3-large_clustering/spec.yaml
+++ b/assets/evaluation_results/arxivclusterings2s_text-embedding-3-large_clustering/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arxivclusterings2s_text-embedding-3-large_clustering
-version: 2.04.11
-display_name: ArxivClusteringS2S_text-embedding-3-large_clustering
-description: text-embedding-3-large run for ArxivClusteringS2S dataset
-dataset_name: ArxivClusteringS2S
-dataset_family: ArxivClusteringS2S
-
-model_name: text-embedding-3-large
-model_version: "1"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-
-tags:
-    evaluation_type: text_embeddings
-    task: clustering
-    primary_metric: v_measure
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-3-large
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-
-metrics:
-    v_measure: 0.4429783426306228
-
-properties: {}
--- a/assets/evaluation_results/arxivclusterings2s_text-embedding-3-small_clustering/asset.yaml
+++ b/assets/evaluation_results/arxivclusterings2s_text-embedding-3-small_clustering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arxivclusterings2s_text-embedding-3-small_clustering/spec.yaml
+++ b/assets/evaluation_results/arxivclusterings2s_text-embedding-3-small_clustering/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arxivclusterings2s_text-embedding-3-small_clustering
-version: 2.04.11
-display_name: ArxivClusteringS2S_text-embedding-3-small_clustering
-description: text-embedding-3-small run for ArxivClusteringS2S dataset
-dataset_name: ArxivClusteringS2S
-dataset_family: ArxivClusteringS2S
-
-model_name: text-embedding-3-small
-model_version: "1"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-
-tags:
-    evaluation_type: text_embeddings
-    task: clustering
-    primary_metric: v_measure
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-3-small
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-
-metrics:
-    v_measure: 0.3940951744128959
-
-properties: {}
--- a/assets/evaluation_results/arxivclusterings2s_text-embedding-ada-002_clustering/asset.yaml
+++ b/assets/evaluation_results/arxivclusterings2s_text-embedding-ada-002_clustering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/arxivclusterings2s_text-embedding-ada-002_clustering/spec.yaml
+++ b/assets/evaluation_results/arxivclusterings2s_text-embedding-ada-002_clustering/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: arxivclusterings2s_text-embedding-ada-002_clustering
-version: 2.04.11
-display_name: ArxivClusteringS2S_text-embedding-ada-002_clustering
-description: text-embedding-ada-002 run for ArxivClusteringS2S dataset
-dataset_name: ArxivClusteringS2S
-dataset_family: ArxivClusteringS2S
-
-model_name: text-embedding-ada-002
-model_version: "2"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-
-tags:
-    evaluation_type: text_embeddings
-    task: clustering
-    primary_metric: v_measure
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-ada-002
-    azure_latest_model_version: 2
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-
-metrics:
-    v_measure: 0.3719179506563676
-
-properties: {}
--- a/assets/evaluation_results/banking77classification_cohere-embed-v3-english_classification/asset.yaml
+++ b/assets/evaluation_results/banking77classification_cohere-embed-v3-english_classification/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/banking77classification_cohere-embed-v3-english_classification/spec.yaml
+++ b/assets/evaluation_results/banking77classification_cohere-embed-v3-english_classification/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: banking77classification_cohere-embed-v3-english_classification
-version: 2.04.11
-display_name: Banking77Classification_cohere-embed-v3-english_classification
-description: cohere-embed-v3-english run for Banking77Classification dataset
-dataset_name: Banking77Classification
-dataset_family: Banking77Classification
-
-model_name: cohere-embed-v3-english
-model_version: "3"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/3
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/3
-
-tags:
-    evaluation_type: text_embeddings
-    task: classification
-    primary_metric: accuracy
-    azure_registry_name: azureml-cohere
-    azure_model_name: Cohere-embed-v3-english
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-english/versions/1
-
-metrics:
-    accuracy: 0.7934415584415586
-
-properties: {}
--- a/assets/evaluation_results/banking77classification_cohere-embed-v3-multilingual_classification/asset.yaml
+++ b/assets/evaluation_results/banking77classification_cohere-embed-v3-multilingual_classification/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/banking77classification_cohere-embed-v3-multilingual_classification/spec.yaml
+++ b/assets/evaluation_results/banking77classification_cohere-embed-v3-multilingual_classification/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: banking77classification_cohere-embed-v3-multilingual_classification
-version: 2.04.11
-display_name: Banking77Classification_cohere-embed-v3-multilingual_classification
-description: cohere-embed-v3-multilingual run for Banking77Classification dataset
-dataset_name: Banking77Classification
-dataset_family: Banking77Classification
-
-model_name: cohere-embed-v3-multilingual
-model_version: "3"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/3
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/3
-
-tags:
-    evaluation_type: text_embeddings
-    task: classification
-    primary_metric: accuracy
-    azure_registry_name: azureml-cohere
-    azure_model_name: Cohere-embed-v3-multilingual
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-embed-v3-multilingual/versions/1
-
-metrics:
-    accuracy: 0.7934415584415585
-
-properties: {}
--- a/assets/evaluation_results/banking77classification_text-embedding-3-large_classification/asset.yaml
+++ b/assets/evaluation_results/banking77classification_text-embedding-3-large_classification/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/banking77classification_text-embedding-3-large_classification/spec.yaml
+++ b/assets/evaluation_results/banking77classification_text-embedding-3-large_classification/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: banking77classification_text-embedding-3-large_classification
-version: 2.04.11
-display_name: Banking77Classification_text-embedding-3-large_classification
-description: text-embedding-3-large run for Banking77Classification dataset
-dataset_name: Banking77Classification
-dataset_family: Banking77Classification
-
-model_name: text-embedding-3-large
-model_version: "1"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-
-tags:
-    evaluation_type: text_embeddings
-    task: classification
-    primary_metric: accuracy
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-3-large
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-large/versions/1
-
-metrics:
-    accuracy: 0.8572402597402597
-
-properties: {}
--- a/assets/evaluation_results/banking77classification_text-embedding-3-small_classification/asset.yaml
+++ b/assets/evaluation_results/banking77classification_text-embedding-3-small_classification/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/banking77classification_text-embedding-3-small_classification/spec.yaml
+++ b/assets/evaluation_results/banking77classification_text-embedding-3-small_classification/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: banking77classification_text-embedding-3-small_classification
-version: 2.04.11
-display_name: Banking77Classification_text-embedding-3-small_classification
-description: text-embedding-3-small run for Banking77Classification dataset
-dataset_name: Banking77Classification
-dataset_family: Banking77Classification
-
-model_name: text-embedding-3-small
-model_version: "1"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-
-tags:
-    evaluation_type: text_embeddings
-    task: classification
-    primary_metric: accuracy
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-3-small
-    azure_latest_model_version: 1
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-3-small/versions/1
-
-metrics:
-    accuracy: 0.8299025974025973
-
-properties: {}
--- a/assets/evaluation_results/banking77classification_text-embedding-ada-002_classification/asset.yaml
+++ b/assets/evaluation_results/banking77classification_text-embedding-ada-002_classification/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/banking77classification_text-embedding-ada-002_classification/spec.yaml
+++ b/assets/evaluation_results/banking77classification_text-embedding-ada-002_classification/spec.yaml
@ -1,28 +0,0 @@
-type: evaluationresult
-name: banking77classification_text-embedding-ada-002_classification
-version: 2.04.11
-display_name: Banking77Classification_text-embedding-ada-002_classification
-description: text-embedding-ada-002 run for Banking77Classification dataset
-dataset_name: Banking77Classification
-dataset_family: Banking77Classification
-
-model_name: text-embedding-ada-002
-model_version: "2"
-model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-relationships:
-    - relationshipType: Source
-      assetId: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-
-tags:
-    evaluation_type: text_embeddings
-    task: classification
-    primary_metric: accuracy
-    azure_registry_name: azure-openai
-    azure_model_name: text-embedding-ada-002
-    azure_latest_model_version: 2
-    azure_latest_model_asset_id: azureml://registries/azure-openai/models/text-embedding-ada-002/versions/2
-
-metrics:
-    accuracy: 0.8053246753246753
-
-properties: {}
--- a/assets/evaluation_results/boolqgpt-4-0125-previewquestion_answering/asset.yaml
+++ b/assets/evaluation_results/boolqgpt-4-0125-previewquestion_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolqgpt-4-0125-previewquestion_answering/spec.yaml
+++ b/assets/evaluation_results/boolqgpt-4-0125-previewquestion_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq__gpt-4-0125-preview__question_answering
-version: 2.04.11
-display_name: boolq__gpt-4-0125-Preview__chat_completion
-description: Benchmark__gpt40125__hf_boolq__chat_completion
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: gpt-4-0125-Preview
-model_version: "0125-Preview"
-model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/0125-Preview
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azure-openai/models/gpt-4/versions/0125-Preview
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azure-openai
-  azure_model_name: gpt-4
-  azure_latest_model_version: turbo-2024-04-09
-  azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
-
-metrics:
-  accuracy: 0.904892966
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq__meta-llama-3_1-70b-instruct__question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq__meta-llama-3_1-70b-instruct__question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq__meta-llama-3_1-70b-instruct__question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq__meta-llama-3_1-70b-instruct__question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq__meta-llama-3_1-70b-instruct__question_answering
-version: 2.04.11
-display_name: boolq__Meta-Llama-3_1-70B-Instruct__chat_completion
-description: Benchmark__Llama-3-1-70B-Instruct-bench__hf_boolq__chat_completion
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: Meta-Llama-3.1-70B-Instruct
-model_version: "1"
-model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3.1-70B-Instruct/versions/1
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Meta-Llama-3.1-70B-Instruct/versions/1
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Meta-Llama-3.1-70B-Instruct
-  azure_latest_model_version: 3
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3.1-70B-Instruct/versions/3
-
-metrics:
-  accuracy: 0.909785933
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq__meta-llama-3_1-8b-instruct__question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq__meta-llama-3_1-8b-instruct__question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq__meta-llama-3_1-8b-instruct__question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq__meta-llama-3_1-8b-instruct__question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq__meta-llama-3_1-8b-instruct__question_answering
-version: 2.04.11
-display_name: boolq__Meta-Llama-3_1-8B-Instruct__chat_completion
-description: Benchmark__meta-llama-3-1-8b-instruct-1__hf_boolq__chat_completion
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: Meta-Llama-3.1-8B-Instruct
-model_version: "1"
-model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3.1-8B-Instruct/versions/1
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Meta-Llama-3.1-8B-Instruct/versions/1
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Meta-Llama-3.1-8B-Instruct
-  azure_latest_model_version: 3
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3.1-8B-Instruct/versions/3
-
-metrics:
-  accuracy: 0.868501529
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_cohere_command_r_plus_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_cohere_command_r_plus_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_cohere_command_r_plus_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_cohere_command_r_plus_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_cohere_command_r_plus_question_answering
-version: 2.04.11
-display_name: boolq_Cohere_command_r_plus_question_answering
-description: Cohere-command-r-plus run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: Cohere-command-r-plus
-model_version: "1"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/1
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/1
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-cohere
-  azure_model_name: Cohere-command-r-plus
-  azure_latest_model_version: 1
-  azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r-plus/versions/1
-
-metrics:
-  accuracy: 0.909480122
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_cohere_command_r_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_cohere_command_r_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_cohere_command_r_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_cohere_command_r_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_cohere_command_r_question_answering
-version: 2.04.11
-display_name: boolq_Cohere_command_r_question_answering
-description: Cohere-command-r run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: Cohere-command-r
-model_version: "1"
-model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/1
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/1
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-cohere
-  azure_model_name: Cohere-command-r
-  azure_latest_model_version: 1
-  azure_latest_model_asset_id: azureml://registries/azureml-cohere/models/Cohere-command-r/versions/1
-
-metrics:
-  accuracy: 0.8819571865443425
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_databricks-dbrx-base_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_databricks-dbrx-base_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_databricks-dbrx-base_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_databricks-dbrx-base_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_databricks-dbrx-base_question_answering
-version: 2.04.11
-display_name: boolq_databricks-dbrx-base_question_answering
-description: databricks-dbrx-base run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: databricks-dbrx-base
-model_version: "1"
-model_asset_id: azureml://registries/azureml-restricted/models/databricks-dbrx-base/versions/1
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-restricted/models/databricks-dbrx-base/versions/1
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-restricted
-  azure_model_name: databricks-dbrx-base
-  azure_latest_model_version: 3
-  azure_latest_model_asset_id: azureml://registries/azureml-restricted/models/databricks-dbrx-base/versions/3
-
-metrics:
-  accuracy: 0.9159021
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_databricks-dbrx-instruct_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_databricks-dbrx-instruct_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_databricks-dbrx-instruct_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_databricks-dbrx-instruct_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_databricks-dbrx-instruct_question_answering
-version: 2.04.11
-display_name: boolq_databricks-dbrx-instruct_question_answering
-description: databricks-dbrx-instruct run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: databricks-dbrx-instruct
-model_version: "1"
-model_asset_id: azureml://registries/azureml-restricted/models/databricks-dbrx-instruct/versions/1
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-restricted/models/databricks-dbrx-instruct/versions/1
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-restricted
-  azure_model_name: databricks-dbrx-instruct
-  azure_latest_model_version: 3
-  azure_latest_model_asset_id: azureml://registries/azureml-restricted/models/databricks-dbrx-instruct/versions/3
-
-metrics:
-  accuracy: 0.9051988
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_gpt-4-turbo-2024-04-09_chat_completion/asset.yaml
+++ b/assets/evaluation_results/boolq_gpt-4-turbo-2024-04-09_chat_completion/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_gpt-4-turbo-2024-04-09_chat_completion/spec.yaml
+++ b/assets/evaluation_results/boolq_gpt-4-turbo-2024-04-09_chat_completion/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_gpt-4-turbo-2024-04-09_chat_completion
-version: 2.04.11
-display_name: boolq_gpt-4-turbo-2024-04-09_chat_completion
-description: boolq_gpt-4-turbo-2024-04-09_chat_completion
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: gpt-4-turbo-2024-04-09
-model_version: "turbo-2024-04-09"
-model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azure-openai
-  azure_model_name: gpt-4
-  azure_latest_model_version: turbo-2024-04-09
-  azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
-
-metrics:
-  accuracy: 0.9125382262996942
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_gpt-4o_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_gpt-4o_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_gpt-4o_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_gpt-4o_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_gpt-4o_question_answering
-version: 2.04.11
-display_name: boolq_gpt-4o_question_answering
-description: gpt-4o run for boolq
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: gpt-4o-2024-05-13
-model_version: "2024-05-13"
-model_asset_id: azureml://registries/azure-openai/models/gpt-4o/versions/2024-05-13
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azure-openai/models/gpt-4o/versions/2024-05-13
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azure-openai
-  azure_model_name: gpt-4o
-  azure_latest_model_version: 2024-08-06
-  azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4o/versions/2024-08-06
-
-metrics:
-  accuracy: 0.908562691
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_gpt_35_turbo_0301_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_gpt_35_turbo_0301_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_gpt_35_turbo_0301_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_gpt_35_turbo_0301_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_gpt_35_turbo_0301_question_answering
-version: 2.04.11
-display_name: boolq_gpt_35_turbo_0301_question_answering
-description: gpt-35-turbo-0301 run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: gpt-35-turbo-0301
-model_version: "0301"
-model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0301
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0301
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azure-openai
-  azure_model_name: gpt-35-turbo
-  azure_latest_model_version: 0125
-  azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0125
-
-metrics:
-  accuracy: 0.867
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_gpt_35_turbo_0613_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_gpt_35_turbo_0613_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_gpt_35_turbo_0613_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_gpt_35_turbo_0613_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_gpt_35_turbo_0613_question_answering
-version: 2.04.11
-display_name: boolq_gpt_35_turbo_0613_question_answering
-description: gpt-35-turbo-0613 run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: gpt-35-turbo-0613
-model_version: "0613"
-model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0613
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0613
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azure-openai
-  azure_model_name: gpt-35-turbo
-  azure_latest_model_version: 0125
-  azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-35-turbo/versions/0125
-
-metrics:
-  accuracy: 0.864
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_gpt_4_0314_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_gpt_4_0314_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_gpt_4_0314_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_gpt_4_0314_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_gpt_4_0314_question_answering
-version: 2.04.11
-display_name: boolq_gpt_4_0314_question_answering
-description: gpt-4-0314 run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: gpt-4-0314
-model_version: "0314"
-model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/4
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azure-openai/models/gpt-4/versions/4
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azure-openai
-  azure_model_name: gpt-4
-  azure_latest_model_version: turbo-2024-04-09
-  azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
-
-metrics:
-  accuracy: 0.911
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_gpt_4_0613_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_gpt_4_0613_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_gpt_4_0613_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_gpt_4_0613_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_gpt_4_0613_question_answering
-version: 2.04.11
-display_name: boolq_gpt_4_0613_question_answering
-description: gpt-4-0613 run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: gpt-4-0613
-model_version: "0613"
-model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/0613
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azure-openai/models/gpt-4/versions/0613
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azure-openai
-  azure_model_name: gpt-4
-  azure_latest_model_version: turbo-2024-04-09
-  azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4/versions/turbo-2024-04-09
-
-metrics:
-  accuracy: 0.912
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_gpt_4_32k_0314_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_gpt_4_32k_0314_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_gpt_4_32k_0314_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_gpt_4_32k_0314_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_gpt_4_32k_0314_question_answering
-version: 2.04.11
-display_name: boolq_gpt_4_32k_0314_question_answering
-description: gpt-4-32k-0314 run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: gpt-4-32k-0314
-model_version: "0314"
-model_asset_id: azureml://registries/azure-openai/models/gpt-4-32k/versions/0314
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azure-openai/models/gpt-4-32k/versions/0314
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azure-openai
-  azure_model_name: gpt-4-32k
-  azure_latest_model_version: 0613
-  azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4-32k/versions/0613
-
-metrics:
-  accuracy: 0.913
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_gpt_4_32k_0613_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_gpt_4_32k_0613_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_gpt_4_32k_0613_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_gpt_4_32k_0613_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_gpt_4_32k_0613_question_answering
-version: 2.04.11
-display_name: boolq_gpt_4_32k_0613_question_answering
-description: gpt-4-32k-0613 run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: gpt-4-32k-0613
-model_version: "0613"
-model_asset_id: azureml://registries/azure-openai/models/gpt-4-32k/versions/0613
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azure-openai/models/gpt-4-32k/versions/0613
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azure-openai
-  azure_model_name: gpt-4-32k
-  azure_latest_model_version: 0613
-  azure_latest_model_asset_id: azureml://registries/azure-openai/models/gpt-4-32k/versions/0613
-
-metrics:
-  accuracy: 0.911
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_llama_2_13b_chat_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_llama_2_13b_chat_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_llama_2_13b_chat_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_llama_2_13b_chat_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_llama_2_13b_chat_question_answering
-version: 2.04.11
-display_name: boolq_llama_2_13b_chat_question_answering
-description: llama-2-13b-chat run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: llama-2-13b-chat
-model_version: "12"
-model_asset_id: azureml://registries/azureml-meta/models/Llama-2-13b-chat/versions/12
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Llama-2-13b-chat/versions/12
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Llama-2-13b-chat
-  azure_latest_model_version: 20
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-13b-chat/versions/20
-
-metrics:
-  accuracy: 0.801
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_llama_2_13b_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_llama_2_13b_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_llama_2_13b_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_llama_2_13b_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_llama_2_13b_question_answering
-version: 2.04.11
-display_name: boolq_llama_2_13b_question_answering
-description: llama-2-13b run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: llama-2-13b
-model_version: "12"
-model_asset_id: azureml://registries/azureml-meta/models/Llama-2-13b/versions/12
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Llama-2-13b/versions/12
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Llama-2-13b
-  azure_latest_model_version: 23
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-13b/versions/23
-
-metrics:
-  accuracy: 0.723
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_llama_2_70b_chat_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_llama_2_70b_chat_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_llama_2_70b_chat_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_llama_2_70b_chat_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_llama_2_70b_chat_question_answering
-version: 2.04.11
-display_name: boolq_llama_2_70b_chat_question_answering
-description: llama-2-70b-chat run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: llama-2-70b-chat
-model_version: "12"
-model_asset_id: azureml://registries/azureml-meta/models/Llama-2-70b-chat/versions/12
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Llama-2-70b-chat/versions/12
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Llama-2-70b-chat
-  azure_latest_model_version: 20
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-70b-chat/versions/20
-
-metrics:
-  accuracy: 0.826
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_llama_2_70b_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_llama_2_70b_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_llama_2_70b_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_llama_2_70b_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_llama_2_70b_question_answering
-version: 2.04.11
-display_name: boolq_llama_2_70b_question_answering
-description: llama-2-70b run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: llama-2-70b
-model_version: "13"
-model_asset_id: azureml://registries/azureml-meta/models/Llama-2-70b/versions/13
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Llama-2-70b/versions/13
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Llama-2-70b
-  azure_latest_model_version: 24
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-70b/versions/24
-
-metrics:
-  accuracy: 0.853
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_llama_2_7b_chat_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_llama_2_7b_chat_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_llama_2_7b_chat_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_llama_2_7b_chat_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_llama_2_7b_chat_question_answering
-version: 2.04.11
-display_name: boolq_llama_2_7b_chat_question_answering
-description: llama-2-7b-chat run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: llama-2-7b-chat
-model_version: "14"
-model_asset_id: azureml://registries/azureml-meta/models/Llama-2-7b-chat/versions/14
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Llama-2-7b-chat/versions/14
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Llama-2-7b-chat
-  azure_latest_model_version: 24
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-7b-chat/versions/24
-
-metrics:
-  accuracy: 0.771
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_llama_2_7b_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_llama_2_7b_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_llama_2_7b_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_llama_2_7b_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_llama_2_7b_question_answering
-version: 2.04.11
-display_name: boolq_llama_2_7b_question_answering
-description: llama-2-7b run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: llama-2-7b
-model_version: "12"
-model_asset_id: azureml://registries/azureml-meta/models/Llama-2-7b/versions/12
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Llama-2-7b/versions/12
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Llama-2-7b
-  azure_latest_model_version: 22
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Llama-2-7b/versions/22
-
-metrics:
-  accuracy: 0.628
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_meta-llama-3-70b-instruct_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_meta-llama-3-70b-instruct_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_meta-llama-3-70b-instruct_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_meta-llama-3-70b-instruct_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_meta-llama-3-70b-instruct_question_answering
-version: 2.04.11
-display_name: boolq_Meta-Llama-3-70B-Instruct_question_answering
-description: Meta-Llama-3-70B-Instruct run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: Meta-Llama-3-70B-Instruct
-model_version: "2"
-model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-70B-Instruct/versions/2
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Meta-Llama-3-70B-Instruct/versions/2
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Meta-Llama-3-70B-Instruct
-  azure_latest_model_version: 8
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-70B-Instruct/versions/8
-
-metrics:
-  accuracy: 0.9027522935779817
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_meta-llama-3-70b_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_meta-llama-3-70b_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_meta-llama-3-70b_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_meta-llama-3-70b_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_meta-llama-3-70b_question_answering
-version: 2.04.11
-display_name: boolq_Meta-Llama-3-70B_question_answering
-description: Meta-Llama-3-70B run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: Meta-Llama-3-70B
-model_version: "1"
-model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-70B/versions/1
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Meta-Llama-3-70B/versions/1
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Meta-Llama-3-70B
-  azure_latest_model_version: 6
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-70B/versions/6
-
-metrics:
-  accuracy: 0.8917431192660551
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_meta-llama-3-8b-instruct_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_meta-llama-3-8b-instruct_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_meta-llama-3-8b-instruct_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_meta-llama-3-8b-instruct_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_meta-llama-3-8b-instruct_question_answering
-version: 2.04.11
-display_name: boolq_Meta-Llama-3-8B-Instruct_question_answering
-description: Meta-Llama-3-8B-Instruct run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: Meta-Llama-3-8B-Instruct
-model_version: "2"
-model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-8B-Instruct/versions/2
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Meta-Llama-3-8B-Instruct/versions/2
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Meta-Llama-3-8B-Instruct
-  azure_latest_model_version: 8
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-8B-Instruct/versions/8
-
-metrics:
-  accuracy: 0.863302752
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/assets/evaluation_results/boolq_meta-llama-3-8b_question_answering/asset.yaml
+++ b/assets/evaluation_results/boolq_meta-llama-3-8b_question_answering/asset.yaml
@ -1,3 +0,0 @@
-type: evaluationresult
-spec: spec.yaml
-categories: ["EvaluationResult"]
--- a/assets/evaluation_results/boolq_meta-llama-3-8b_question_answering/spec.yaml
+++ b/assets/evaluation_results/boolq_meta-llama-3-8b_question_answering/spec.yaml
@ -1,35 +0,0 @@
-type: evaluationresult
-name: boolq_meta-llama-3-8b_question_answering
-version: 2.04.11
-display_name: boolq_Meta-Llama-3-8B_question_answering
-description: Meta-Llama-3-8B run for boolq dataset
-dataset_family: boolq
-dataset_name: boolq
-
-model_name: Meta-Llama-3-8B
-model_version: "1"
-model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-8B/versions/1
-
-relationships:
-  - relationshipType: Source
-    assetId: azureml://registries/azureml-meta/models/Meta-Llama-3-8B/versions/1
-
-tags:
-  evaluation_type: text_generation
-  task: question-answering
-  accuracy_metric_name: exact_match
-  azure_registry_name: azureml-meta
-  azure_model_name: Meta-Llama-3-8B
-  azure_latest_model_version: 7
-  azure_latest_model_asset_id: azureml://registries/azureml-meta/models/Meta-Llama-3-8B/versions/7
-
-metrics:
-  accuracy: 0.8198776758409786
-
-
-properties:
-  n_shot: 5
-  evaluation_sampling_ratio: 1.0
-  evaluation_split: "validation"
-  fewshot_sampling_ratio: 1.0
-  fewshot_split: "train"
--- a/Показать больше
+++ b/Показать больше