зеркало из https://github.com/microsoft/DeepSpeed.git
add CPU autotp UT (#4263)
This commit is contained in:
Родитель
28b9d5c231
Коммит
388c84834f
|
@ -76,4 +76,5 @@ jobs:
|
|||
source oneCCL/build/_install/env/setvars.sh
|
||||
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
|
||||
cd tests
|
||||
TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' -m 'inference_ops' -m 'inference' unit/
|
||||
TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/
|
||||
TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/
|
||||
|
|
|
@ -12,6 +12,10 @@ from unit.common import DistributedTest
|
|||
from deepspeed.accelerator import get_accelerator
|
||||
|
||||
from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM)
|
||||
from deepspeed.ops.op_builder import InferenceBuilder
|
||||
|
||||
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
|
||||
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)
|
||||
|
||||
rocm_version = OpBuilder.installed_rocm_version()
|
||||
if rocm_version != (0, 0):
|
||||
|
|
|
@ -12,6 +12,10 @@ from unit.common import DistributedTest
|
|||
from deepspeed.accelerator import get_accelerator
|
||||
|
||||
from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM)
|
||||
from deepspeed.ops.op_builder import InferenceBuilder
|
||||
|
||||
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
|
||||
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)
|
||||
|
||||
rocm_version = OpBuilder.installed_rocm_version()
|
||||
if rocm_version != (0, 0):
|
||||
|
|
|
@ -14,6 +14,10 @@ from deepspeed.ops.op_builder import OpBuilder
|
|||
from deepspeed.utils import safe_get_full_grad
|
||||
import numpy.testing as npt
|
||||
from unit.common import DistributedTest
|
||||
from deepspeed.ops.op_builder import InferenceBuilder
|
||||
|
||||
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
|
||||
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)
|
||||
|
||||
from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM)
|
||||
|
||||
|
|
|
@ -22,9 +22,6 @@ from torch import nn
|
|||
from deepspeed.accelerator import get_accelerator
|
||||
from deepspeed.ops.op_builder import InferenceBuilder
|
||||
|
||||
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
|
||||
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)
|
||||
|
||||
rocm_version = OpBuilder.installed_rocm_version()
|
||||
if rocm_version != (0, 0):
|
||||
pytest.skip("skip inference tests on rocm for now", allow_module_level=True)
|
||||
|
@ -365,6 +362,9 @@ class TestMPSize(DistributedTest):
|
|||
if invalid_test_msg:
|
||||
pytest.skip(invalid_test_msg)
|
||||
|
||||
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
|
||||
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)
|
||||
|
||||
model, task = model_w_task
|
||||
local_rank = int(os.getenv("LOCAL_RANK", "0"))
|
||||
|
||||
|
@ -401,6 +401,9 @@ class TestLowCpuMemUsage(DistributedTest):
|
|||
):
|
||||
model, task = model_w_task
|
||||
dtype = torch.float16
|
||||
if dtype not in get_accelerator().supported_dtypes():
|
||||
pytest.skip(f"Acceleraor {get_accelerator().device_name()} does not support {dtype}.")
|
||||
|
||||
local_rank = int(os.getenv("LOCAL_RANK", "0"))
|
||||
|
||||
pipe = pipeline(task, model=model, model_kwargs={"low_cpu_mem_usage": True}, device=local_rank, framework="pt")
|
||||
|
@ -514,7 +517,7 @@ class TestInjectionPolicy(DistributedTest):
|
|||
[("Helsinki-NLP/opus-mt-en-de", "translation"), ("Salesforce/codegen-350M-mono", "text-generation")],
|
||||
ids=["marian", "codegen"], #codegen has fusedqkv weight.
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", [torch.float16], ids=["fp16"])
|
||||
@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16], ids=["fp16", "bf16"])
|
||||
class TestAutoTensorParallelism(DistributedTest):
|
||||
world_size = [2]
|
||||
|
||||
|
@ -530,6 +533,13 @@ class TestAutoTensorParallelism(DistributedTest):
|
|||
if invalid_test_msg:
|
||||
pytest.skip(invalid_test_msg)
|
||||
|
||||
if dtype not in get_accelerator().supported_dtypes():
|
||||
pytest.skip(f"Acceleraor {get_accelerator().device_name()} does not support {dtype}.")
|
||||
|
||||
# TODO: enable this test after torch 2.1 stable release
|
||||
if dtype == torch.bfloat16 and model_w_task[0] == "Salesforce/codegen-350M-mono":
|
||||
pytest.skip("Codegen model(bf16) need to use torch version > 2.0.")
|
||||
|
||||
model, task = model_w_task
|
||||
local_rank = int(os.getenv("LOCAL_RANK", "0"))
|
||||
world_size = int(os.getenv("WORLD_SIZE", "2"))
|
||||
|
|
Загрузка…
Ссылка в новой задаче