зеркало из https://github.com/microsoft/hat.git
119 строки
6.1 KiB
Plaintext
119 строки
6.1 KiB
Plaintext
#ifdef TOML
|
|
|
|
[description]
|
|
comment = "John Doe's GEMM Library"
|
|
author = "John Doe"
|
|
version = "1.2.3.5"
|
|
license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
|
|
|
|
[functions]
|
|
[functions.GEMM_B94D27B9934D3E08]
|
|
name = "GEMM_B94D27B9934D3E08"
|
|
description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with specific sizes."
|
|
calling_convention = "stdcall"
|
|
arguments = [
|
|
{ name = "A", description = "left-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", shape = [ 512, 256 ], affine_map = [ 256, 1 ], affine_offset = 0 },
|
|
{ name = "B", description = "right-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", shape = [ 256, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 },
|
|
{ name = "C", description = "result accumulation matrix", logical_type = "affine_array", declared_type = "float*", element_type = "float", usage = "input_output", shape = [ 512, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 },
|
|
{ name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
|
|
{ name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" }
|
|
]
|
|
return = { name = "return value", description = "GEMM_B94D27B9934D3E08 return value", logical_type = "void", declared_type = "void", element_type = "void", usage = "output" }
|
|
|
|
[functions.GEMM_B94D27B9934D3E08.auxiliary]
|
|
[functions.GEMM_B94D27B9934D3E08.auxiliary.FooToolchain]
|
|
node = "gemm"
|
|
|
|
[functions.GEMM_B94D27B9934D3E08.auxiliary.Contoso]
|
|
implementation = "Contoso_GEMM_123"
|
|
contoso_tool_version = "1.0"
|
|
node = "GEMM"
|
|
[functions.GEMM_B94D27B9934D3E08.auxiliary.Contoso.implementation_config]
|
|
PackBMatrix = true
|
|
|
|
|
|
[functions.blas_sgemm_row_major]
|
|
name = "blas_sgemm_row_major"
|
|
description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with runtime-known sizes and non-transposed row-major matrices."
|
|
calling_convention = "stdcall"
|
|
arguments = [
|
|
{ name = "M", description = "Number of rows in the A and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
|
|
{ name = "N", description = "Number of columns in the B and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
|
|
{ name = "K", description = "Number of columns in the A matrix and rows in the B matrix", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
|
|
{ name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
|
|
{ name = "A", description = "left-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "lda * K" },
|
|
{ name = "lda", description = "Leading dimension step size of A", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
|
|
{ name = "B", description = "right-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "ldb * N" },
|
|
{ name = "ldb", description = "Leading dimension step size of B", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
|
|
{ name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
|
|
{ name = "C", description = "result accumulation matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "ldc * N" },
|
|
{ name = "ldc", description = "Leading dimension step size of C", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" }
|
|
]
|
|
return = { name = "return value", description = "blas_sgemm_row_major return value", logical_type = "void", declared_type = "void", element_type = "void", usage = "output" }
|
|
|
|
[target]
|
|
[target.required]
|
|
os = "windows"
|
|
|
|
[target.required.CPU]
|
|
architecture = "x86_64"
|
|
extensions = ["AVX2"]
|
|
|
|
[target.optimized_for.CPU]
|
|
name = "Intel Xeon E5-4669 v4"
|
|
family = "Broadwell"
|
|
clock_frequency = 2.2
|
|
cores = 22
|
|
threads = 44
|
|
|
|
[target.optimized_for.CPU.cache]
|
|
instruction_KB = 32
|
|
sizes_KB = [ 32, 256, 56320 ]
|
|
line_sizes = [ 64, 64, 64 ]
|
|
|
|
# The dependencies table provides information that a consumer of this .hat file
|
|
# will need to act on in order to properly consume the package, such as library
|
|
# files to link and dynamic libraries to make available at runtime
|
|
[dependencies]
|
|
link_target = "sample_gemm.lib"
|
|
deploy_files = [ "sample_gemm.dll" ]
|
|
dynamic = [ { name = "Windows Universal C Runtime", version = "141", target_file = "ucrtbase.dll" } ]
|
|
|
|
# The compiled_with table provides information that a consumer of this .hat file
|
|
# may find useful but may not necessarily need to act on in order to successfully
|
|
# consume this package
|
|
[compiled_with]
|
|
compiler = "MSVC141"
|
|
flags = "-std=c++14 -ffast-math -fno-exceptions -fno-rtti"
|
|
crt = "ucrt"
|
|
libraries = [ { name = "LLVM OpenMP", version = "5.1", target_file = "libomp.lib" } ]
|
|
|
|
[declaration]
|
|
code = '''
|
|
#endif // TOML
|
|
|
|
#pragma once
|
|
|
|
#include <stdint.h>
|
|
|
|
#if defined(__cplusplus)
|
|
extern "C"
|
|
{
|
|
#endif // defined(__cplusplus)
|
|
|
|
//
|
|
// Functions
|
|
//
|
|
|
|
void GEMM_B94D27B9934D3E08(const float* A, const float* B, float* C, float alpha, float beta);
|
|
|
|
void blas_sgemm_row_major(int M, int N, int K, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc);
|
|
|
|
#if defined(__cplusplus)
|
|
} // extern "C"
|
|
#endif // defined(__cplusplus)
|
|
|
|
#ifdef TOML
|
|
'''
|
|
#endif // TOML
|