chore(research): Renames harness to lm_eval_harness research project.

2023-01-24 16:02:58 -03:00 · 2023-01-24 16:02:58 -03:00 · b0a3436e04
--- a/research/helm_harness/.gitignore
+++ b/research/helm_harness/.gitignore
--- a/research/helm_harness/README.md
+++ b/research/helm_harness/README.md
@ -0,0 +1,3 @@
+# HELM-Harness
+
+## Installation
--- a/research/helm_harness/helm_harness/init.py
+++ b/research/helm_harness/helm_harness/init.py
--- a/research/helm_harness/requirements.txt
+++ b/research/helm_harness/requirements.txt
--- a/research/helm_harness/setup.py
+++ b/research/helm_harness/setup.py
@ -6,10 +6,10 @@ from setuptools import find_packages, setup
 install_requires = [r.rstrip() for r in open("requirements.txt", "r").readlines()]

 setup(
-    name="harness",
-    version="0.1",
+    name="helm_harness",
+    version="0.0.1",
    author="Microsoft",
-    url="https://github.com/microsoft/archai/research/harness",
+    url="https://github.com/microsoft/archai/research/helm_harness",
    license="MIT",
    install_requires=install_requires,
    packages=find_packages(),
--- a/research/lm_eval_harness/.gitignore
+++ b/research/lm_eval_harness/.gitignore
--- a/research/lm_eval_harness/README.md
+++ b/research/lm_eval_harness/README.md
@ -1,19 +1,19 @@
-# Harness
+# LM-Eval-Harness

 ## Installation

-To install Harness, run the following commands in your command line:
+To install `lm_eval_harness`, run the following commands in your command line:

 ```shell
-conda create -n harness python=3.8
-conda activate harness
+conda create -n lm_eval_harness python=3.8
+conda activate lm_eval_harness

 pip install -e .
 ```

-## Evaluating with LM-Evaluation-Harness (lm-eval)
+## Evaluating with `lm_eval_harness`

-To evaluate your model with LM-Evaluation-Harness, run the following command:
+To evaluate your model with `lm_eval_harness`, run the following command:

 ```shell
 python evaluate_with_lm_eval.py --help
--- a/research/lm_eval_harness/evaluate_with_lm_eval_harness.py
+++ b/research/lm_eval_harness/evaluate_with_lm_eval_harness.py
@ -4,14 +4,13 @@
 import argparse
 import json

-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-from harness.lm_eval_evaluator import evaluate_wrapper
-from harness.lm_eval_hf_model import HFEvalModel
-from harness.tasks.human_eval import HumanEval
-from harness.utils.regex import MultiChoice, pattern_match
 from lm_eval.evaluator import make_table
 from lm_eval.tasks import ALL_TASKS, TASK_REGISTRY
+from lm_eval_harness.lm_eval_evaluator import evaluate_wrapper
+from lm_eval_harness.lm_eval_hf_model import HFEvalModel
+from lm_eval_harness.tasks.human_eval import HumanEval
+from lm_eval_harness.utils.regex import MultiChoice, pattern_match
+from transformers import AutoModelForCausalLM, AutoTokenizer

 # Ensures additional tasks are loaded and registered
 ALL_TASKS.append("human_eval")
@ -112,7 +111,7 @@ if __name__ == "__main__":
    model = AutoModelForCausalLM.from_pretrained(args.pre_trained_model_path)
    tokenizer = AutoTokenizer.from_pretrained(args.hub_tokenizer_path)
    hf_model = HFEvalModel(model, tokenizer)
-    
+
    outputs = evaluate_wrapper(
        hf_model,
        task_names,
--- a/research/lm_eval_harness/lm_eval_harness/init.py
+++ b/research/lm_eval_harness/lm_eval_harness/init.py
--- a/research/lm_eval_harness/lm_eval_harness/lm_eval_evaluator.py
+++ b/research/lm_eval_harness/lm_eval_harness/lm_eval_evaluator.py
@ -2,15 +2,15 @@
 # Licensed under the MIT license.

 import random
-from typing import List, Optional
 from hashlib import sha1
+from typing import List, Optional

 import numpy as np
-from harness.lm_eval_hf_model import HFEvalModel
 from lm_eval.base import CachingLM
 from lm_eval.evaluator import evaluate
 from lm_eval.tasks import get_task_dict
 from lm_eval.utils import run_task_tests
+from lm_eval_harness.lm_eval_hf_model import HFEvalModel


 def evaluate_wrapper(
--- a/research/lm_eval_harness/lm_eval_harness/lm_eval_hf_model.py
+++ b/research/lm_eval_harness/lm_eval_harness/lm_eval_hf_model.py
@ -4,16 +4,20 @@
 from typing import List, Optional

 import torch
-from harness.utils.multiple_token_stopping_criteria import MultipleTokenStoppingCriteria
-from harness.utils.request_factory import Request
 from lm_eval.base import BaseLM
+from lm_eval_harness.utils.multiple_token_stopping_criteria import (
+    MultipleTokenStoppingCriteria,
+)
+from lm_eval_harness.utils.request_factory import Request
 from tqdm import tqdm
 from transformers.generation.stopping_criteria import StoppingCriteriaList
 from transformers.tokenization_utils import PreTrainedTokenizer


 class HFEvalModel(BaseLM):
-    def __init__(self, model: torch.nn.Module, tokenizer: PreTrainedTokenizer, force_attention_mask: Optional[bool] = False) -> None:
+    def __init__(
+        self, model: torch.nn.Module, tokenizer: PreTrainedTokenizer, force_attention_mask: Optional[bool] = False
+    ) -> None:
        super().__init__()

        self._device = torch.device("cpu")
--- a/research/lm_eval_harness/lm_eval_harness/tasks/init.py
+++ b/research/lm_eval_harness/lm_eval_harness/tasks/init.py
@ -0,0 +1,2 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
--- a/research/lm_eval_harness/lm_eval_harness/tasks/human_eval.py
+++ b/research/lm_eval_harness/lm_eval_harness/tasks/human_eval.py
@ -6,9 +6,9 @@ from typing import Any, Dict, List

 from datasets.arrow_dataset import Dataset
 from evaluate import load
-from harness.utils.request_factory import Request, rf
 from lm_eval.base import Task
 from lm_eval.metrics import mean
+from lm_eval_harness.utils.request_factory import Request, rf

 # Allow code evaluation
 os.environ["HF_ALLOW_CODE_EVAL"] = "1"
--- a/research/lm_eval_harness/lm_eval_harness/utils/init.py
+++ b/research/lm_eval_harness/lm_eval_harness/utils/init.py
@ -0,0 +1,2 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
--- a/research/lm_eval_harness/lm_eval_harness/utils/multiple_token_stopping_criteria.py
+++ b/research/lm_eval_harness/lm_eval_harness/utils/multiple_token_stopping_criteria.py
--- a/research/lm_eval_harness/lm_eval_harness/utils/regex.py
+++ b/research/lm_eval_harness/lm_eval_harness/utils/regex.py
--- a/research/lm_eval_harness/lm_eval_harness/utils/request_factory.py
+++ b/research/lm_eval_harness/lm_eval_harness/utils/request_factory.py
--- a/research/lm_eval_harness/requirements.txt
+++ b/research/lm_eval_harness/requirements.txt
--- a/research/lm_eval_harness/setup.py
+++ b/research/lm_eval_harness/setup.py
@ -0,0 +1,17 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from setuptools import find_packages, setup
+
+install_requires = [r.rstrip() for r in open("requirements.txt", "r").readlines()]
+
+setup(
+    name="lm_eval_harness",
+    version="0.0.1",
+    author="Microsoft",
+    url="https://github.com/microsoft/archai/research/lm_eval_harness",
+    license="MIT",
+    install_requires=install_requires,
+    packages=find_packages(),
+    include_package_data=True,
+)
--- a/research/lm_eval_harness/tutorials/custom_task_evaluation.ipynb
+++ b/research/lm_eval_harness/tutorials/custom_task_evaluation.ipynb
@ -5,9 +5,9 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# How-To Evaluate a Custom Task with LM-Eval\n",
+    "# How-To Evaluate a Custom Task with LM-Eval Harness\n",
    "\n",
-    "Even though `lm-eval` framework supports more than 200 tasks, one might want to implement an additional one. With that in mind, this tutorial walks through the process of creating a custom task, including it in the registry and evaluating models with it."
+    "Even though `lm_eval` framework supports more than 200 tasks, one might want to implement an additional one. With that in mind, this tutorial walks through the process of creating a custom task, including it in the registry and evaluating models with it."
   ]
  },
  {
@ -17,7 +17,7 @@
   "source": [
    "## Installation\n",
    "\n",
-    "The `harness` project is designed to be an installable module, which allow users to call it from outside its package. Thus, one can install it as follows:"
+    "The `lm_eval_harness` project is designed to be an installable module, which allow users to call it from outside its package. Thus, one can install it as follows:"
   ]
  },
  {
@ -27,9 +27,9 @@
   "outputs": [],
   "source": [
    "try:\n",
-    "    import harness\n",
+    "    import lm_eval_harness\n",
    "except ModuleNotFoundError:\n",
-    "    !pip install git+https://github.com/microsoft/archai.git@pre-release#subdirectory=research/harness"
+    "    !pip install git+https://github.com/microsoft/archai.git@pre-release#subdirectory=research/lm_eval_harness"
   ]
  },
  {
@ -75,7 +75,7 @@
    "from typing import Any, Dict, List\n",
    "\n",
    "from datasets.arrow_dataset import Dataset\n",
-    "from harness.utils.request_factory import Request, rf\n",
+    "from lm_eval_harness.utils.request_factory import Request, rf\n",
    "from lm_eval.base import Task\n",
    "from lm_eval.metrics import mean\n",
    "\n",
@ -222,8 +222,8 @@
    "\n",
    "from lm_eval.evaluator import make_table\n",
    "\n",
-    "from harness.lm_eval_evaluator import evaluate_wrapper\n",
-    "from harness.lm_eval_hf_model import HFEvalModel\n",
+    "from lm_eval_harness.lm_eval_evaluator import evaluate_wrapper\n",
+    "from lm_eval_harness.lm_eval_hf_model import HFEvalModel\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n",
--- a/research/lm_eval_harness/tutorials/simple_evaluation.ipynb
+++ b/research/lm_eval_harness/tutorials/simple_evaluation.ipynb
@ -5,9 +5,9 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# How-To Evaluate Models with LM-Eval\n",
+    "# How-To Evaluate Models with LM-Eval Harness\n",
    "\n",
-    "The `harness` research project implements a wrapper over the `lm-eval` framework, provided by EleutherAI. It is designed to make it easy to evaluate NLP models and compare their performance. In this tutorial, we will walk through the process of evaluating NLP models with `harness`, including how to set up the framework, how to use it to evaluate models, and how to interpret the results."
+    "The `lm_eval_harness` research project implements a wrapper over the `lm_eval` framework, provided by EleutherAI. It is designed to make it easy to evaluate NLP models and compare their performance. In this tutorial, we will walk through the process of evaluating NLP models with `lm_eval_harness`, including how to set up the framework, how to use it to evaluate models, and how to interpret the results."
   ]
  },
  {
@ -17,19 +17,40 @@
   "source": [
    "## Installation\n",
    "\n",
-    "The `harness` project is designed to be an installable module, which allow users to call it from outside its package. Thus, one can install it as follows:"
+    "The `lm_eval_harness` project is designed to be an installable module, which allow users to call it from outside its package. Thus, one can install it as follows:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting git+https://github.com/microsoft/archai.git@pre-release#subdirectory=research/lm_eval_harness\n",
+      "  Cloning https://github.com/microsoft/archai.git (to revision pre-release) to c:\\users\\gderosa\\appdata\\local\\temp\\pip-req-build-2q9113pq\n",
+      "  Resolved https://github.com/microsoft/archai.git to commit 81ffdd907b9485e3663f1ddbf32e2f862a65f4fe\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  Running command git clone --filter=blob:none --quiet https://github.com/microsoft/archai.git 'C:\\Users\\gderosa\\AppData\\Local\\Temp\\pip-req-build-2q9113pq'\n",
+      "  Running command git checkout -b pre-release --track origin/pre-release\n",
+      "  Branch 'pre-release' set up to track remote branch 'pre-release' from 'origin'.\n",
+      "  Switched to a new branch 'pre-release'\n",
+      "ERROR: git+https://github.com/microsoft/archai.git@pre-release#subdirectory=research/lm_eval_harness does not appear to be a Python project: neither 'setup.py' nor 'pyproject.toml' found.\n"
+     ]
+    }
+   ],
   "source": [
    "try:\n",
-    "    import harness\n",
+    "    import lm_eval_harness\n",
    "except ModuleNotFoundError:\n",
-    "    !pip install git+https://github.com/microsoft/archai.git@pre-release#subdirectory=research/harness"
+    "    !pip install git+https://github.com/microsoft/archai.git@pre-release#subdirectory=research/lm_eval_harness"
   ]
  },
  {
@ -46,12 +67,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'lm_eval_harness'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_388\\1302793399.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mtransformers\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mAutoModelForCausalLM\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mAutoTokenizer\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mlm_eval_harness\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlm_eval_hf_model\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mHFEvalModel\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mAutoModelForCausalLM\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"gpt2\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[0mtokenizer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mAutoTokenizer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"gpt2\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'lm_eval_harness'"
+     ]
+    }
+   ],
   "source": [
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
-    "from harness.lm_eval_hf_model import HFEvalModel\n",
+    "from lm_eval_harness.lm_eval_hf_model import HFEvalModel\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n",
@ -83,7 +116,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
@ -138,7 +171,7 @@
   ],
   "source": [
    "from lm_eval.tasks import ALL_TASKS\n",
-    "from harness.lm_eval_evaluator import evaluate_wrapper\n",
+    "from lm_eval_harness.lm_eval_evaluator import evaluate_wrapper\n",
    "\n",
    "print(f\"List of tasks: {ALL_TASKS}\")\n",
    "\n",
@ -162,7 +195,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "metadata": {},
   "outputs": [
    {