зеркало из https://github.com/microsoft/hat.git
Incorporating review feedback from teams discussions
This commit is contained in:
Родитель
0897f81f72
Коммит
fc8a87ad8b
|
@ -5,15 +5,19 @@ comment = "Automatically generated by the RoboCode compiler"
|
|||
author = "John Doe"
|
||||
version = "1.2.3.5"
|
||||
license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
|
||||
library_name = "sample_gemm.lib"
|
||||
library_md5 = "ABCD1234..."
|
||||
|
||||
[functions]
|
||||
[functions.GEMM_B94D27B9934D3E08]
|
||||
name = "GEMM_B94D27B9934D3E08"
|
||||
description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with specific sizes."
|
||||
calling_convention = "__stdcall"
|
||||
can_assert = false
|
||||
arguments = [
|
||||
{ name = "A", description = "left-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { affine_array = { shape = [ 512, 256 ], affine_map = [ 256, 1 ], affine_offset = 0 } } },
|
||||
{ name = "B", description = "right-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { affine_array = { shape = [ 256, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 } } },
|
||||
{ name = "C", description = "result accumulation matrix", logical_type = "affine_array", declared_type = "float*", element_type = "float", usage = "input_output", layout = { affine_array = { shape = [ 512, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 } } },
|
||||
{ name = "A", description = "left-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", shape = [ 512, 256 ], affine_map = [ 256, 1 ], affine_offset = 0 },
|
||||
{ name = "B", description = "right-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", shape = [ 256, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 },
|
||||
{ name = "C", description = "result accumulation matrix", logical_type = "affine_array", declared_type = "float*", element_type = "float", usage = "input_output", shape = [ 512, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 },
|
||||
{ name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
|
||||
{ name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" }
|
||||
]
|
||||
|
@ -28,6 +32,7 @@ license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
|
|||
|
||||
[functions.GEMM_B94D27B9934D3E08.auxiliary.RoboCode]
|
||||
implementation = "MLAS_Value_GEMM"
|
||||
generator_version = "1.0"
|
||||
node = "GEMM"
|
||||
[functions.GEMM_B94D27B9934D3E08.auxiliary.RoboCode.implementation_config]
|
||||
MLAS_Value_CacheBMatrix = true
|
||||
|
@ -36,17 +41,19 @@ license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
|
|||
[functions.blas_sgemm_row_major]
|
||||
name = "blas_sgemm_row_major"
|
||||
description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with runtime-known sizes and non-transposed row-major matrices."
|
||||
calling_convention = "__stdcall"
|
||||
can_assert = true
|
||||
arguments = [
|
||||
{ name = "M", description = "Number of rows in the A and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
|
||||
{ name = "N", description = "Number of columns in the B and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
|
||||
{ name = "K", description = "Number of columns in the A matrix and rows in the B matrix", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
|
||||
{ name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
|
||||
{ name = "A", description = "left-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "lda * K" } } },
|
||||
{ name = "A", description = "left-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "lda * K" },
|
||||
{ name = "lda", description = "Leading dimension step size of A", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
|
||||
{ name = "B", description = "right-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "ldb * N" } } },
|
||||
{ name = "B", description = "right-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "ldb * N" },
|
||||
{ name = "ldb", description = "Leading dimension step size of B", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
|
||||
{ name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
|
||||
{ name = "C", description = "result accumulation matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "ldc * N" } } },
|
||||
{ name = "C", description = "result accumulation matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "ldc * N" },
|
||||
{ name = "ldc", description = "Leading dimension step size of C", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" }
|
||||
]
|
||||
return = { name = "return value", description = "blas_sgemm_row_major return value", logical_type = "void", declared_type = "void", element_type = "void", usage = "output" }
|
||||
|
@ -65,13 +72,11 @@ license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
|
|||
clock_frequency = 2.2
|
||||
cores = 22
|
||||
threads = 44
|
||||
|
||||
|
||||
[target.optimized_for.CPU.cache]
|
||||
cache_line = 64
|
||||
instruction_KB = 32
|
||||
l1_KB = 32
|
||||
l2_KB = 256
|
||||
l3_KB = 56320
|
||||
sizes_KB = [ 32, 256, 56320 ]
|
||||
line_sizes = [ 64, 64, 64 ]
|
||||
|
||||
[dependencies]
|
||||
dynamic = [ { name = "Windows Universal C Runtime", version = "141", link_lib = "ucrtbase.dll" } ]
|
||||
|
@ -80,7 +85,7 @@ dynamic = [ { name = "Windows Universal C Runtime", version = "141", link_lib =
|
|||
compiler = "MSVC141"
|
||||
flags = "-std=c++14 -ffast-math -fno-exceptions -fno-rtti"
|
||||
crt = "ucrt"
|
||||
libraries = [ { name = "OpenMP", version = "5.1", link_lib = "/openmp" } ]
|
||||
libraries = [ { name = "LLVM OpenMP", version = "5.1", link_lib = "libomp.lib" } ]
|
||||
|
||||
[declaration]
|
||||
code = '''
|
||||
|
|
109
schema/hat.tosd
109
schema/hat.tosd
|
@ -1,41 +1,10 @@
|
|||
# HAT TOML Schema
|
||||
[toml-schema]
|
||||
version = "0.0.0.1"
|
||||
version = "0.0.0.2"
|
||||
|
||||
# Types to be used elsewhere in this schema
|
||||
[types]
|
||||
|
||||
# Layout type associated with "affine_array" logical parameter types
|
||||
# "affine_array" logical parameters have fully compile-time-known sizes that are encoded in this metadata
|
||||
[types.affineArrayLayoutType]
|
||||
type = "table"
|
||||
|
||||
# Array giving the logical shape of the buffer, e.g. [256, 256]
|
||||
[types.affineArrayLayoutType.shape]
|
||||
type = "array"
|
||||
arraytype = "integer"
|
||||
|
||||
# Array giving the affine map coefficients used to map from multi-dimensional coordinates to an offset in the C-style array buffer.
|
||||
# e.g. [256, 1] would indicate that for index (i, j), the position in the buffer is (256 * i) + (1 * j)
|
||||
# Should have the same number of elements as the shape array
|
||||
[types.affineArrayLayoutType.affine_map]
|
||||
type = "array"
|
||||
arraytype = "integer"
|
||||
|
||||
# Offset from the buffer pointer where the logical affine array begins.
|
||||
[types.affineArrayLayoutType.affine_offset]
|
||||
type = "integer"
|
||||
|
||||
# Layout type associated with "runtime_array" logical parameter types
|
||||
# "runtime_array" logical parameters have a runtime-determined size that is given by other parameters to the function.
|
||||
[types.runtimeArrayLayoutType]
|
||||
type = "table"
|
||||
|
||||
# A string describing the number of elements in the buffer, typically expected to reference other parameters in the function.
|
||||
# e.g. "N", "lda * K"
|
||||
[types.runtimeArrayLayoutType.size]
|
||||
type = "string"
|
||||
|
||||
# Parameter type of arguments and return values
|
||||
[types.paramType]
|
||||
type = "table"
|
||||
|
@ -66,13 +35,31 @@ version = "0.0.0.1"
|
|||
type = "string"
|
||||
allowedvalues = [ "input_output", "input", "output" ]
|
||||
|
||||
# The layout of the parameter, if applicable. Only array logical types have a layout.
|
||||
# TOML schema only supports 'oneof' constraints on collections, so we use a collection and have a single dummy key in it
|
||||
[types.paramType.layout]
|
||||
type = "collection"
|
||||
oneof = [ "types.affineArrayLayoutType", "types.runtimeArrayLayoutType" ]
|
||||
minlength = 1
|
||||
maxlength = 1
|
||||
# Optional array giving the logical shape of the buffer for an affine_array logical type, e.g. [256, 256]
|
||||
[types.paramType.shape]
|
||||
type = "array"
|
||||
arraytype = "integer"
|
||||
optional = true
|
||||
|
||||
# Optional array giving the affine map coefficients used to map from multi-dimensional coordinates to an
|
||||
# offset in the C-style array buffer for an affine_array logical type.
|
||||
# e.g. [256, 1] would indicate that for index (i, j), the position in the buffer is (256 * i) + (1 * j)
|
||||
# Should have the same number of elements as the shape array
|
||||
[types.paramType.affine_map]
|
||||
type = "array"
|
||||
arraytype = "integer"
|
||||
optional = true
|
||||
|
||||
# Offset from the buffer pointer where the array data begins for an affine_array logical type.
|
||||
[types.paramType.affine_offset]
|
||||
type = "integer"
|
||||
optional = true
|
||||
|
||||
# A string describing the number of elements in the buffer for a runtime_array logical type.
|
||||
# Typically expected to reference other parameters in the function.
|
||||
# e.g. "N", "lda * K"
|
||||
[types.paramType.size]
|
||||
type = "string"
|
||||
optional = true
|
||||
|
||||
# Type for a function described by TOML data and declared in the C declaration later in the HAT file
|
||||
|
@ -86,6 +73,15 @@ version = "0.0.0.1"
|
|||
# A friendly description of what the function does
|
||||
[types.functionType.description]
|
||||
type = "string"
|
||||
|
||||
# The calling convention for this function
|
||||
[types.functionType.calling_convention]
|
||||
type = "string"
|
||||
allowedvalues = [ "__stdcall", "__cdecl", "__fastcall", "__vectorcall" ]
|
||||
|
||||
# Whether or not this function can assert based on arguments given
|
||||
[types.functionType.can_assert]
|
||||
type = "boolean"
|
||||
|
||||
# An array of arguments to the function
|
||||
[types.functionType.arguments]
|
||||
|
@ -144,6 +140,14 @@ version = "0.0.0.1"
|
|||
[elements.description.license_url]
|
||||
type = "string"
|
||||
|
||||
# Library file name
|
||||
[elements.description.library_name]
|
||||
type = "string"
|
||||
|
||||
# MD5 hash of the library file this .hat file is associated with
|
||||
[elements.description.library_md5]
|
||||
type = "string"
|
||||
|
||||
# Collection of functions declared within the HAT file and their metadata
|
||||
# The keys in a collection are not prescribed by the schema, and in this case are the names of the functions as the HAT format does not support function overloading.
|
||||
[elements.functions]
|
||||
|
@ -247,22 +251,19 @@ version = "0.0.0.1"
|
|||
type = "table"
|
||||
optional = true
|
||||
|
||||
# Cache line size in bytes
|
||||
[elements.target.optimized_for.CPU.cache.cache_line]
|
||||
type = "integer"
|
||||
|
||||
# Instruction cache size in KB
|
||||
[elements.target.optimized_for.CPU.cache.instruction_KB]
|
||||
type = "integer"
|
||||
|
||||
# TODO : discussion - do we instead want this to be an array of cache sizes so 'l1', 'l2', 'l3' aren't baked into the schema?
|
||||
# Cache sizes in KB
|
||||
[elements.target.optimized_for.CPU.cache.l1_KB]
|
||||
type = "integer"
|
||||
[elements.target.optimized_for.CPU.cache.l2_KB]
|
||||
type = "integer"
|
||||
[elements.target.optimized_for.CPU.cache.l3_KB]
|
||||
type = "integer"
|
||||
# Ordered cache sizes in KB
|
||||
[elements.target.optimized_for.CPU.cache.sizes_KB]
|
||||
type = "array"
|
||||
arraytype = "integer"
|
||||
|
||||
# Ordered cache line sizes in bytes
|
||||
[elements.target.optimized_for.CPU.cache.line_sizes]
|
||||
type = "array"
|
||||
arraytype = "integer"
|
||||
|
||||
# Optional additional optimized target characteristics not specified in this schema
|
||||
[elements.target.optimized_for.CPU.auxiliary]
|
||||
|
@ -273,6 +274,7 @@ version = "0.0.0.1"
|
|||
[elements.target.optimized_for.GPU]
|
||||
type = "table"
|
||||
optional = true
|
||||
|
||||
# Best optimized GPU core count
|
||||
[elements.target.optimized_for.GPU.cores]
|
||||
type = "integer"
|
||||
|
@ -290,9 +292,10 @@ version = "0.0.0.1"
|
|||
type = "table"
|
||||
optional = true
|
||||
|
||||
# Table describing dependencies that projects using this HAT library must supply
|
||||
# Table describing dependencies that projects using this HAT library must supply at runtime
|
||||
[elements.dependencies]
|
||||
type = "table"
|
||||
|
||||
# Dynamic libraries that must be linked
|
||||
[elements.dependencies.dynamic]
|
||||
type = "array"
|
||||
|
@ -306,6 +309,7 @@ version = "0.0.0.1"
|
|||
# Table giving information about how this HAT library was built
|
||||
[elements.compiled_with]
|
||||
type = "table"
|
||||
|
||||
# Compiler name and version, e.g. "MSVC141"
|
||||
[elements.compiled_with.compiler]
|
||||
type = "string"
|
||||
|
@ -331,6 +335,7 @@ version = "0.0.0.1"
|
|||
# Table containing the C header code in this HAT file
|
||||
[elements.declaration]
|
||||
type = "table"
|
||||
|
||||
# String containing the entirety of the C header code
|
||||
[elements.declaration.code]
|
||||
type = "string"
|
||||
|
|
Загрузка…
Ссылка в новой задаче