39 строки
1.2 KiB
JSON
39 строки
1.2 KiB
JSON
{
|
|
"input_model": { "type": "HfModel", "model_path": "meta-llama/Llama-2-7b-hf" },
|
|
"systems": {
|
|
"local_system": {
|
|
"type": "LocalSystem",
|
|
"accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
|
|
}
|
|
},
|
|
"passes": {
|
|
"tensor_parallel": {
|
|
"type": "PyTorchTensorParallel",
|
|
"user_script": "llama2_tensor_parallel.py",
|
|
"class_name": "LlamaPyTorchTensorParallel",
|
|
"world_size": 4
|
|
},
|
|
"conversion": {
|
|
"type": "OnnxConversion",
|
|
"target_opset": 17,
|
|
"save_as_external_data": true,
|
|
"all_tensors_to_one_file": true
|
|
},
|
|
"transformers_optimization_fp16": {
|
|
"type": "OrtTransformersOptimization",
|
|
"save_as_external_data": true,
|
|
"all_tensors_to_one_file": true,
|
|
"model_type": "gpt2",
|
|
"opt_level": 0,
|
|
"only_onnxruntime": false,
|
|
"keep_io_types": false,
|
|
"float16": true,
|
|
"use_gqa": true
|
|
}
|
|
},
|
|
"host": "local_system",
|
|
"target": "local_system",
|
|
"cache_dir": "cache",
|
|
"output_dir": "models/tensor_parallel"
|
|
}
|