Incorporating review feedback from teams discussions

This commit is contained in:
Mason Remy 2021-01-26 22:04:14 -08:00
Родитель 0897f81f72
Коммит fc8a87ad8b
2 изменённых файлов: 74 добавлений и 64 удалений

Просмотреть файл

@ -5,15 +5,19 @@ comment = "Automatically generated by the RoboCode compiler"
author = "John Doe"
version = "1.2.3.5"
license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
library_name = "sample_gemm.lib"
library_md5 = "ABCD1234..."
[functions]
[functions.GEMM_B94D27B9934D3E08]
name = "GEMM_B94D27B9934D3E08"
description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with specific sizes."
calling_convention = "__stdcall"
can_assert = false
arguments = [
{ name = "A", description = "left-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { affine_array = { shape = [ 512, 256 ], affine_map = [ 256, 1 ], affine_offset = 0 } } },
{ name = "B", description = "right-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { affine_array = { shape = [ 256, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 } } },
{ name = "C", description = "result accumulation matrix", logical_type = "affine_array", declared_type = "float*", element_type = "float", usage = "input_output", layout = { affine_array = { shape = [ 512, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 } } },
{ name = "A", description = "left-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", shape = [ 512, 256 ], affine_map = [ 256, 1 ], affine_offset = 0 },
{ name = "B", description = "right-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", shape = [ 256, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 },
{ name = "C", description = "result accumulation matrix", logical_type = "affine_array", declared_type = "float*", element_type = "float", usage = "input_output", shape = [ 512, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 },
{ name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
{ name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" }
]
@ -28,6 +32,7 @@ license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
[functions.GEMM_B94D27B9934D3E08.auxiliary.RoboCode]
implementation = "MLAS_Value_GEMM"
generator_version = "1.0"
node = "GEMM"
[functions.GEMM_B94D27B9934D3E08.auxiliary.RoboCode.implementation_config]
MLAS_Value_CacheBMatrix = true
@ -36,17 +41,19 @@ license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
[functions.blas_sgemm_row_major]
name = "blas_sgemm_row_major"
description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with runtime-known sizes and non-transposed row-major matrices."
calling_convention = "__stdcall"
can_assert = true
arguments = [
{ name = "M", description = "Number of rows in the A and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "N", description = "Number of columns in the B and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "K", description = "Number of columns in the A matrix and rows in the B matrix", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
{ name = "A", description = "left-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "lda * K" } } },
{ name = "A", description = "left-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "lda * K" },
{ name = "lda", description = "Leading dimension step size of A", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "B", description = "right-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "ldb * N" } } },
{ name = "B", description = "right-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "ldb * N" },
{ name = "ldb", description = "Leading dimension step size of B", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
{ name = "C", description = "result accumulation matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "ldc * N" } } },
{ name = "C", description = "result accumulation matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "ldc * N" },
{ name = "ldc", description = "Leading dimension step size of C", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" }
]
return = { name = "return value", description = "blas_sgemm_row_major return value", logical_type = "void", declared_type = "void", element_type = "void", usage = "output" }
@ -65,13 +72,11 @@ license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
clock_frequency = 2.2
cores = 22
threads = 44
[target.optimized_for.CPU.cache]
cache_line = 64
instruction_KB = 32
l1_KB = 32
l2_KB = 256
l3_KB = 56320
sizes_KB = [ 32, 256, 56320 ]
line_sizes = [ 64, 64, 64 ]
[dependencies]
dynamic = [ { name = "Windows Universal C Runtime", version = "141", link_lib = "ucrtbase.dll" } ]
@ -80,7 +85,7 @@ dynamic = [ { name = "Windows Universal C Runtime", version = "141", link_lib =
compiler = "MSVC141"
flags = "-std=c++14 -ffast-math -fno-exceptions -fno-rtti"
crt = "ucrt"
libraries = [ { name = "OpenMP", version = "5.1", link_lib = "/openmp" } ]
libraries = [ { name = "LLVM OpenMP", version = "5.1", link_lib = "libomp.lib" } ]
[declaration]
code = '''

Просмотреть файл

@ -1,41 +1,10 @@
# HAT TOML Schema
[toml-schema]
version = "0.0.0.1"
version = "0.0.0.2"
# Types to be used elsewhere in this schema
[types]
# Layout type associated with "affine_array" logical parameter types
# "affine_array" logical parameters have fully compile-time-known sizes that are encoded in this metadata
[types.affineArrayLayoutType]
type = "table"
# Array giving the logical shape of the buffer, e.g. [256, 256]
[types.affineArrayLayoutType.shape]
type = "array"
arraytype = "integer"
# Array giving the affine map coefficients used to map from multi-dimensional coordinates to an offset in the C-style array buffer.
# e.g. [256, 1] would indicate that for index (i, j), the position in the buffer is (256 * i) + (1 * j)
# Should have the same number of elements as the shape array
[types.affineArrayLayoutType.affine_map]
type = "array"
arraytype = "integer"
# Offset from the buffer pointer where the logical affine array begins.
[types.affineArrayLayoutType.affine_offset]
type = "integer"
# Layout type associated with "runtime_array" logical parameter types
# "runtime_array" logical parameters have a runtime-determined size that is given by other parameters to the function.
[types.runtimeArrayLayoutType]
type = "table"
# A string describing the number of elements in the buffer, typically expected to reference other parameters in the function.
# e.g. "N", "lda * K"
[types.runtimeArrayLayoutType.size]
type = "string"
# Parameter type of arguments and return values
[types.paramType]
type = "table"
@ -66,13 +35,31 @@ version = "0.0.0.1"
type = "string"
allowedvalues = [ "input_output", "input", "output" ]
# The layout of the parameter, if applicable. Only array logical types have a layout.
# TOML schema only supports 'oneof' constraints on collections, so we use a collection and have a single dummy key in it
[types.paramType.layout]
type = "collection"
oneof = [ "types.affineArrayLayoutType", "types.runtimeArrayLayoutType" ]
minlength = 1
maxlength = 1
# Optional array giving the logical shape of the buffer for an affine_array logical type, e.g. [256, 256]
[types.paramType.shape]
type = "array"
arraytype = "integer"
optional = true
# Optional array giving the affine map coefficients used to map from multi-dimensional coordinates to an
# offset in the C-style array buffer for an affine_array logical type.
# e.g. [256, 1] would indicate that for index (i, j), the position in the buffer is (256 * i) + (1 * j)
# Should have the same number of elements as the shape array
[types.paramType.affine_map]
type = "array"
arraytype = "integer"
optional = true
# Offset from the buffer pointer where the array data begins for an affine_array logical type.
[types.paramType.affine_offset]
type = "integer"
optional = true
# A string describing the number of elements in the buffer for a runtime_array logical type.
# Typically expected to reference other parameters in the function.
# e.g. "N", "lda * K"
[types.paramType.size]
type = "string"
optional = true
# Type for a function described by TOML data and declared in the C declaration later in the HAT file
@ -86,6 +73,15 @@ version = "0.0.0.1"
# A friendly description of what the function does
[types.functionType.description]
type = "string"
# The calling convention for this function
[types.functionType.calling_convention]
type = "string"
allowedvalues = [ "__stdcall", "__cdecl", "__fastcall", "__vectorcall" ]
# Whether or not this function can assert based on arguments given
[types.functionType.can_assert]
type = "boolean"
# An array of arguments to the function
[types.functionType.arguments]
@ -144,6 +140,14 @@ version = "0.0.0.1"
[elements.description.license_url]
type = "string"
# Library file name
[elements.description.library_name]
type = "string"
# MD5 hash of the library file this .hat file is associated with
[elements.description.library_md5]
type = "string"
# Collection of functions declared within the HAT file and their metadata
# The keys in a collection are not prescribed by the schema, and in this case are the names of the functions as the HAT format does not support function overloading.
[elements.functions]
@ -247,22 +251,19 @@ version = "0.0.0.1"
type = "table"
optional = true
# Cache line size in bytes
[elements.target.optimized_for.CPU.cache.cache_line]
type = "integer"
# Instruction cache size in KB
[elements.target.optimized_for.CPU.cache.instruction_KB]
type = "integer"
# TODO : discussion - do we instead want this to be an array of cache sizes so 'l1', 'l2', 'l3' aren't baked into the schema?
# Cache sizes in KB
[elements.target.optimized_for.CPU.cache.l1_KB]
type = "integer"
[elements.target.optimized_for.CPU.cache.l2_KB]
type = "integer"
[elements.target.optimized_for.CPU.cache.l3_KB]
type = "integer"
# Ordered cache sizes in KB
[elements.target.optimized_for.CPU.cache.sizes_KB]
type = "array"
arraytype = "integer"
# Ordered cache line sizes in bytes
[elements.target.optimized_for.CPU.cache.line_sizes]
type = "array"
arraytype = "integer"
# Optional additional optimized target characteristics not specified in this schema
[elements.target.optimized_for.CPU.auxiliary]
@ -273,6 +274,7 @@ version = "0.0.0.1"
[elements.target.optimized_for.GPU]
type = "table"
optional = true
# Best optimized GPU core count
[elements.target.optimized_for.GPU.cores]
type = "integer"
@ -290,9 +292,10 @@ version = "0.0.0.1"
type = "table"
optional = true
# Table describing dependencies that projects using this HAT library must supply
# Table describing dependencies that projects using this HAT library must supply at runtime
[elements.dependencies]
type = "table"
# Dynamic libraries that must be linked
[elements.dependencies.dynamic]
type = "array"
@ -306,6 +309,7 @@ version = "0.0.0.1"
# Table giving information about how this HAT library was built
[elements.compiled_with]
type = "table"
# Compiler name and version, e.g. "MSVC141"
[elements.compiled_with.compiler]
type = "string"
@ -331,6 +335,7 @@ version = "0.0.0.1"
# Table containing the C header code in this HAT file
[elements.declaration]
type = "table"
# String containing the entirety of the C header code
[elements.declaration.code]
type = "string"