From 388c84834fca87465aff8bb8f6d85be88fa82ba6 Mon Sep 17 00:00:00 2001 From: Yejing-Lai <55339926+Yejing-Lai@users.noreply.github.com> Date: Thu, 28 Sep 2023 06:39:24 +0800 Subject: [PATCH] add CPU autotp UT (#4263) --- .github/workflows/cpu-inference.yml | 3 ++- tests/unit/hybrid_engine/test_he_all.py | 4 ++++ tests/unit/hybrid_engine/test_he_llama.py | 4 ++++ tests/unit/hybrid_engine/test_he_lora.py | 4 ++++ tests/unit/inference/test_inference.py | 18 ++++++++++++++---- 5 files changed, 28 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cpu-inference.yml b/.github/workflows/cpu-inference.yml index 2c555203e..8bba51dab 100644 --- a/.github/workflows/cpu-inference.yml +++ b/.github/workflows/cpu-inference.yml @@ -76,4 +76,5 @@ jobs: source oneCCL/build/_install/env/setvars.sh unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch cd tests - TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' -m 'inference_ops' -m 'inference' unit/ + TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/ + TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/ diff --git a/tests/unit/hybrid_engine/test_he_all.py b/tests/unit/hybrid_engine/test_he_all.py index 86eabb1ad..aa1f12064 100644 --- a/tests/unit/hybrid_engine/test_he_all.py +++ b/tests/unit/hybrid_engine/test_he_all.py @@ -12,6 +12,10 @@ from unit.common import DistributedTest from deepspeed.accelerator import get_accelerator from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM) +from deepspeed.ops.op_builder import InferenceBuilder + +if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: + pytest.skip("This op had not been implemented on this system.", allow_module_level=True) rocm_version = OpBuilder.installed_rocm_version() if rocm_version != (0, 0): diff --git a/tests/unit/hybrid_engine/test_he_llama.py b/tests/unit/hybrid_engine/test_he_llama.py index 5f992f69b..fcf5b8ffb 100644 --- a/tests/unit/hybrid_engine/test_he_llama.py +++ b/tests/unit/hybrid_engine/test_he_llama.py @@ -12,6 +12,10 @@ from unit.common import DistributedTest from deepspeed.accelerator import get_accelerator from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM) +from deepspeed.ops.op_builder import InferenceBuilder + +if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: + pytest.skip("This op had not been implemented on this system.", allow_module_level=True) rocm_version = OpBuilder.installed_rocm_version() if rocm_version != (0, 0): diff --git a/tests/unit/hybrid_engine/test_he_lora.py b/tests/unit/hybrid_engine/test_he_lora.py index f61fdeb3a..ea27239ed 100644 --- a/tests/unit/hybrid_engine/test_he_lora.py +++ b/tests/unit/hybrid_engine/test_he_lora.py @@ -14,6 +14,10 @@ from deepspeed.ops.op_builder import OpBuilder from deepspeed.utils import safe_get_full_grad import numpy.testing as npt from unit.common import DistributedTest +from deepspeed.ops.op_builder import InferenceBuilder + +if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: + pytest.skip("This op had not been implemented on this system.", allow_module_level=True) from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM) diff --git a/tests/unit/inference/test_inference.py b/tests/unit/inference/test_inference.py index 4ee3cd73c..894f040be 100644 --- a/tests/unit/inference/test_inference.py +++ b/tests/unit/inference/test_inference.py @@ -22,9 +22,6 @@ from torch import nn from deepspeed.accelerator import get_accelerator from deepspeed.ops.op_builder import InferenceBuilder -if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: - pytest.skip("This op had not been implemented on this system.", allow_module_level=True) - rocm_version = OpBuilder.installed_rocm_version() if rocm_version != (0, 0): pytest.skip("skip inference tests on rocm for now", allow_module_level=True) @@ -365,6 +362,9 @@ class TestMPSize(DistributedTest): if invalid_test_msg: pytest.skip(invalid_test_msg) + if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: + pytest.skip("This op had not been implemented on this system.", allow_module_level=True) + model, task = model_w_task local_rank = int(os.getenv("LOCAL_RANK", "0")) @@ -401,6 +401,9 @@ class TestLowCpuMemUsage(DistributedTest): ): model, task = model_w_task dtype = torch.float16 + if dtype not in get_accelerator().supported_dtypes(): + pytest.skip(f"Acceleraor {get_accelerator().device_name()} does not support {dtype}.") + local_rank = int(os.getenv("LOCAL_RANK", "0")) pipe = pipeline(task, model=model, model_kwargs={"low_cpu_mem_usage": True}, device=local_rank, framework="pt") @@ -514,7 +517,7 @@ class TestInjectionPolicy(DistributedTest): [("Helsinki-NLP/opus-mt-en-de", "translation"), ("Salesforce/codegen-350M-mono", "text-generation")], ids=["marian", "codegen"], #codegen has fusedqkv weight. ) -@pytest.mark.parametrize("dtype", [torch.float16], ids=["fp16"]) +@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16], ids=["fp16", "bf16"]) class TestAutoTensorParallelism(DistributedTest): world_size = [2] @@ -530,6 +533,13 @@ class TestAutoTensorParallelism(DistributedTest): if invalid_test_msg: pytest.skip(invalid_test_msg) + if dtype not in get_accelerator().supported_dtypes(): + pytest.skip(f"Acceleraor {get_accelerator().device_name()} does not support {dtype}.") + + # TODO: enable this test after torch 2.1 stable release + if dtype == torch.bfloat16 and model_w_task[0] == "Salesforce/codegen-350M-mono": + pytest.skip("Codegen model(bf16) need to use torch version > 2.0.") + model, task = model_w_task local_rank = int(os.getenv("LOCAL_RANK", "0")) world_size = int(os.getenv("WORLD_SIZE", "2"))