hat/samples/sample_gemm_library.hat

119 строки
6.1 KiB
Plaintext

#ifdef TOML
[description]
comment = "John Doe's GEMM Library"
author = "John Doe"
version = "1.2.3.5"
license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
[functions]
[functions.GEMM_B94D27B9934D3E08]
name = "GEMM_B94D27B9934D3E08"
description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with specific sizes."
calling_convention = "stdcall"
arguments = [
{ name = "A", description = "left-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", shape = [ 512, 256 ], affine_map = [ 256, 1 ], affine_offset = 0 },
{ name = "B", description = "right-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", shape = [ 256, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 },
{ name = "C", description = "result accumulation matrix", logical_type = "affine_array", declared_type = "float*", element_type = "float", usage = "input_output", shape = [ 512, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 },
{ name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
{ name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" }
]
return = { name = "return value", description = "GEMM_B94D27B9934D3E08 return value", logical_type = "void", declared_type = "void", element_type = "void", usage = "output" }
[functions.GEMM_B94D27B9934D3E08.auxiliary]
[functions.GEMM_B94D27B9934D3E08.auxiliary.FooToolchain]
node = "gemm"
[functions.GEMM_B94D27B9934D3E08.auxiliary.Contoso]
implementation = "Contoso_GEMM_123"
contoso_tool_version = "1.0"
node = "GEMM"
[functions.GEMM_B94D27B9934D3E08.auxiliary.Contoso.implementation_config]
PackBMatrix = true
[functions.blas_sgemm_row_major]
name = "blas_sgemm_row_major"
description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with runtime-known sizes and non-transposed row-major matrices."
calling_convention = "stdcall"
arguments = [
{ name = "M", description = "Number of rows in the A and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "N", description = "Number of columns in the B and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "K", description = "Number of columns in the A matrix and rows in the B matrix", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
{ name = "A", description = "left-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "lda * K" },
{ name = "lda", description = "Leading dimension step size of A", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "B", description = "right-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "ldb * N" },
{ name = "ldb", description = "Leading dimension step size of B", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
{ name = "C", description = "result accumulation matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "ldc * N" },
{ name = "ldc", description = "Leading dimension step size of C", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" }
]
return = { name = "return value", description = "blas_sgemm_row_major return value", logical_type = "void", declared_type = "void", element_type = "void", usage = "output" }
[target]
[target.required]
os = "windows"
[target.required.CPU]
architecture = "x86_64"
extensions = ["AVX2"]
[target.optimized_for.CPU]
name = "Intel Xeon E5-4669 v4"
family = "Broadwell"
clock_frequency = 2.2
cores = 22
threads = 44
[target.optimized_for.CPU.cache]
instruction_KB = 32
sizes_KB = [ 32, 256, 56320 ]
line_sizes = [ 64, 64, 64 ]
# The dependencies table provides information that a consumer of this .hat file
# will need to act on in order to properly consume the package, such as library
# files to link and dynamic libraries to make available at runtime
[dependencies]
link_target = "sample_gemm.lib"
deploy_files = [ "sample_gemm.dll" ]
dynamic = [ { name = "Windows Universal C Runtime", version = "141", target_file = "ucrtbase.dll" } ]
# The compiled_with table provides information that a consumer of this .hat file
# may find useful but may not necessarily need to act on in order to successfully
# consume this package
[compiled_with]
compiler = "MSVC141"
flags = "-std=c++14 -ffast-math -fno-exceptions -fno-rtti"
crt = "ucrt"
libraries = [ { name = "LLVM OpenMP", version = "5.1", target_file = "libomp.lib" } ]
[declaration]
code = '''
#endif // TOML
#pragma once
#include <stdint.h>
#if defined(__cplusplus)
extern "C"
{
#endif // defined(__cplusplus)
//
// Functions
//
void GEMM_B94D27B9934D3E08(const float* A, const float* B, float* C, float alpha, float beta);
void blas_sgemm_row_major(int M, int N, int K, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc);
#if defined(__cplusplus)
} // extern "C"
#endif // defined(__cplusplus)
#ifdef TOML
'''
#endif // TOML