Adding initial schema and sample

This commit is contained in:
Mason Remy 2021-01-14 18:27:14 -08:00
Родитель 5bd82cb77f
Коммит 6c588dded7
2 изменённых файлов: 448 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,112 @@
#ifdef TOML
[description]
comment = "Automatically generated by the RoboCode compiler"
author = "John Doe"
version = "1.2.3.5"
license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
[functions]
[functions.GEMM_B94D27B9934D3E08]
name = "GEMM_B94D27B9934D3E08"
description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with specific sizes."
arguments = [
{ name = "A", description = "left-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { affine_array = { shape = [ 512, 256 ], affine_map = [ 256, 1 ], affine_offset = 0 } } },
{ name = "B", description = "right-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { affine_array = { shape = [ 256, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 } } },
{ name = "C", description = "result accumulation matrix", logical_type = "affine_array", declared_type = "float*", element_type = "float", usage = "input_output", layout = { affine_array = { shape = [ 512, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 } } },
{ name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
{ name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" }
]
return = { name = "return value", description = "GEMM_B94D27B9934D3E08 return value", logical_type = "void", declared_type = "void", element_type = "void", usage = "output" }
[functions.GEMM_B94D27B9934D3E08.auxiliary]
[functions.GEMM_B94D27B9934D3E08.auxiliary.ORT]
node = "gemm"
[functions.GEMM_B94D27B9934D3E08.auxiliary.MAIC]
node = "gemm"
[functions.GEMM_B94D27B9934D3E08.auxiliary.RoboCode]
implementation = "MLAS_Value_GEMM"
node = "GEMM"
[functions.GEMM_B94D27B9934D3E08.auxiliary.RoboCode.implementation_config]
MLAS_Value_CacheBMatrix = true
[functions.blas_sgemm_row_major]
name = "blas_sgemm_row_major"
description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with runtime-known sizes and non-transposed row-major matrices."
arguments = [
{ name = "M", description = "Number of rows in the A and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "N", description = "Number of columns in the B and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "K", description = "Number of columns in the A matrix and rows in the B matrix", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
{ name = "A", description = "left-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "lda * K" } } },
{ name = "lda", description = "Leading dimension step size of A", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "B", description = "right-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "ldb * N" } } },
{ name = "ldb", description = "Leading dimension step size of B", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
{ name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
{ name = "C", description = "result accumulation matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "ldc * N" } } },
{ name = "ldc", description = "Leading dimension step size of C", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" }
]
return = { name = "return value", description = "blas_sgemm_row_major return value", logical_type = "void", declared_type = "void", element_type = "void", usage = "output" }
[target]
[target.required]
os = "windows"
[target.required.CPU]
architecture = "x86_64"
extensions = ["AVX2"]
[target.optimized_for.CPU]
name = "Intel Xeon E5-4669 v4"
family = "Broadwell"
clock_frequency = 2.2
cores = 22
threads = 44
[target.optimized_for.CPU.cache]
cache_line = 64
instruction_KB = 32
l1_KB = 32
l2_KB = 256
l3_KB = 56320
[dependencies]
dynamic = [ { name = "Windows Universal C Runtime", version = "141", link_lib = "ucrtbase.dll" } ]
[compiled_with]
compiler = "MSVC141"
flags = "-std=c++14 -ffast-math -fno-exceptions -fno-rtti"
crt = "ucrt"
libraries = [ { name = "OpenMP", version = "5.1", link_lib = "/openmp" } ]
[declaration]
code = '''
#endif // TOML
#pragma once
#include <stdint.h>
#if defined(__cplusplus)
extern "C"
{
#endif // defined(__cplusplus)
//
// Functions
//
void GEMM_B94D27B9934D3E08(const float* A, const float* B, float* C, float alpha, float beta);
void blas_sgemm_row_major(int M, int N, int K, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc);
#if defined(__cplusplus)
} // extern "C"
#endif // defined(__cplusplus)
#ifdef TOML
'''
#endif // TOML

336
schema/hat.tosd Normal file
Просмотреть файл

@ -0,0 +1,336 @@
# HAT TOML Schema
[toml-schema]
version = "0.0.0.1"
# Types to be used elsewhere in this schema
[types]
# Layout type associated with "affine_array" logical parameter types
# "affine_array" logical parameters have fully compile-time-known sizes that are encoded in this metadata
[types.affineArrayLayoutType]
type = "table"
# Array giving the logical shape of the buffer, e.g. [256, 256]
[types.affineArrayLayoutType.shape]
type = "array"
arraytype = "integer"
# Array giving the affine map coefficients used to map from multi-dimensional coordinates to an offset in the C-style array buffer.
# e.g. [256, 1] would indicate that for index (i, j), the position in the buffer is (256 * i) + (1 * j)
# Should have the same number of elements as the shape array
[types.affineArrayLayoutType.affine_map]
type = "array"
arraytype = "integer"
# Offset from the buffer pointer where the logical affine array begins.
[types.affineArrayLayoutType.affine_offset]
type = "integer"
# Layout type associated with "runtime_array" logical parameter types
# "runtime_array" logical parameters have a runtime-determined size that is given by other parameters to the function.
[types.runtimeArrayLayoutType]
type = "table"
# A string describing the number of elements in the buffer, typically expected to reference other parameters in the function.
# e.g. "N", "lda * K"
[types.runtimeArrayLayoutType.size]
type = "string"
# Parameter type of arguments and return values
[types.paramType]
type = "table"
# Name of the parameter
[types.paramType.name]
type = "string"
# Friendly string describing the parameter
[types.paramType.description]
type = "string"
# The logical type of the parameter, such as if it's really a multi-dimensional array
[types.paramType.logical_type]
type = "string"
allowedvalues = ["affine_array", "runtime_array", "void", "element"]
# The declared type of the parameter as a valid C type declaration
[types.paramType.declared_type]
type = "string"
# The type of elements in the parameter. E.g. "float" if the declared type is "const float*", "float*", or "float"
[types.paramType.element_type]
type = "string"
# The usage of the parameter in the context of the function
[types.paramType.usage]
type = "string"
allowedvalues = [ "input_output", "input", "output" ]
# The layout of the parameter, if applicable. Only array logical types have a layout.
# TOML schema only supports 'oneof' constraints on collections, so we use a collection and have a single dummy key in it
[types.paramType.layout]
type = "collection"
oneof = [ "types.affineArrayLayoutType", "types.runtimeArrayLayoutType" ]
minlength = 1
maxlength = 1
optional = true
# Type for a function described by TOML data and declared in the C declaration later in the HAT file
[types.functionType]
type = "table"
# The name of the function
[types.functionType.name]
type = "string"
# A friendly description of what the function does
[types.functionType.description]
type = "string"
# An array of arguments to the function
[types.functionType.arguments]
type = "array"
arraytype = "types.paramType"
# The return type description of the function
[types.functionType.return]
typeof = "paramType"
# Optional additional usage-specific information about the function that isn't part of this schema
[types.functionType.auxiliary]
type = "table"
optional = true
# Type of an external library referenced by this metadata, such as a library that was linked into this one or a dependency a user must link
[types.referencedLibraryType]
type = "table"
# Friendly name of the library
[types.referencedLibraryType.name]
type = "string"
# Friendly version string of the library
[types.referencedLibraryType.version]
type = "string"
# The name of the library that is linked or a flag to link it with, as could be used by a build system.
# E.g. "ucrtbase.dll" or "/openmp"
[types.referencedLibraryType.link_lib]
type = "string"
# Definition of the well-known tables/keys/etc in HAT TOML data
[elements]
# Description of the HAT contents
[elements.description]
type = "table"
# Optional user-specified comment about the HAT file or library
[elements.description.comment]
type = "string"
optional = true
# Optional user-specified author name
[elements.description.author]
type = "string"
optional = true
# Version number of the library this HAT data represents
[elements.description.version]
type = "string"
# Url to the full license text for this library
[elements.description.license_url]
type = "string"
# Collection of functions declared within the HAT file and their metadata
# The keys in a collection are not prescribed by the schema, and in this case are the names of the functions as the HAT format does not support function overloading.
[elements.functions]
type = "collection"
typeof = "functionType"
# Table of information about the target device the functions described in this HAT file are intended to be used with
[elements.target]
type = "table"
# Required target features in order to run these functions without error
[elements.target.required]
type = "table"
# The OS that this HAT library is built for
[elements.target.required.os]
type = "string"
allowedvalues = [ "windows", "macos", "linux" ]
# Required CPU characteristics
[elements.target.required.CPU]
type = "table"
# Instruction set architecture, e.g. "x86_64"
[elements.target.required.CPU.architecture]
type = "string"
# Instruction set extensions used by these functions, e.g. "AVX2", "AVX512", etc
[elements.target.required.CPU.extensions]
type = "array"
arraytype = "string"
# Optional additional information not defined in this schema
[elements.target.required.CPU.auxiliary]
type = "table"
optional = true
# Required GPU characteristics if there are GPU functions in this HAT library
[elements.target.required.GPU]
type = "table"
optional = true
# Required GPU runtime library
[elements.target.required.GPU.runtime]
type = "string"
allowedvalues = [ "cuda", "rocm", "vulkan" ]
# Minimum GPU instruction set version
[elements.target.required.GPU.instruction_set_version]
type = "string"
# Minimum number of GPU threads this library will attempt to use
[elements.target.required.GPU.min_threads]
type = "integer"
# Minimum global memory in KB that will be allocated
[elements.target.required.GPU.min_global_memory_KB]
type = "integer"
# Minimum shared memory in KB that will be allocated
[elements.target.required.GPU.min_shared_memory_KB]
type = "integer"
# Optional additional requirements not specified in this schema
[elements.target.required.GPU.auxiliary]
type = "table"
optional = true
# Target characteristics that these functions are optimized for, but are not required in order to function
[elements.target.optimized_for]
type = "table"
optional = true
# Optimized CPU target information
[elements.target.optimized_for.CPU]
type = "table"
optional = true
# Full name and version of the CPU, e.g. "Intel Xeon E5-4669 v4"
[elements.target.optimized_for.CPU.name]
type = "string"
# Processor family, e.g. "Broadwell"
[elements.target.optimized_for.CPU.family]
type = "string"
# Base processor clock speed
[elements.target.optimized_for.CPU.clock_frequency]
type = "integer"
# Number of CPU cores
[elements.target.optimized_for.CPU.cores]
type = "integer"
# Number of CPU threads
[elements.target.optimized_for.CPU.threads]
type = "integer"
# Optimized cache characteristics
[elements.target.optimized_for.CPU.cache]
type = "table"
optional = true
# Cache line size in bytes
[elements.target.optimized_for.CPU.cache.cache_line]
type = "integer"
# Instruction cache size in KB
[elements.target.optimized_for.CPU.cache.instruction_KB]
type = "integer"
# TODO : discussion - do we instead want this to be an array of cache sizes so 'l1', 'l2', 'l3' aren't baked into the schema?
# Cache sizes in KB
[elements.target.optimized_for.CPU.cache.l1_KB]
type = "integer"
[elements.target.optimized_for.CPU.cache.l2_KB]
type = "integer"
[elements.target.optimized_for.CPU.cache.l3_KB]
type = "integer"
# Optional additional optimized target characteristics not specified in this schema
[elements.target.optimized_for.CPU.auxiliary]
type = "table"
optional = true
# Optimized GPU target information
[elements.target.optimized_for.GPU]
type = "table"
optional = true
# Best optimized GPU core count
[elements.target.optimized_for.GPU.cores]
type = "integer"
# Best optimized GPU thread count
[elements.target.optimized_for.GPU.threads]
type = "integer"
# Optimized instruction set version
[elements.target.optimized_for.GPU.instruction_set_version]
type = "string"
# Optional additional optimized target characteristics not specified in this schema
[elements.target.optimized_for.GPU.auxiliary]
type = "table"
optional = true
# Table describing dependencies that projects using this HAT library must supply
[elements.dependencies]
type = "table"
# Dynamic libraries that must be linked
[elements.dependencies.dynamic]
type = "array"
arraytype = "types.referencedLibraryType"
# Optonal additonal dependency information not specified in this schema
[elements.dependencies.auxiliary]
type = "table"
optional = true
# Table giving information about how this HAT library was built
[elements.compiled_with]
type = "table"
# Compiler name and version, e.g. "MSVC141"
[elements.compiled_with.compiler]
type = "string"
# Compilation flags, e.g. "-std=c++14 -ffast-math"
[elements.compiled_with.flags]
type = "string"
# C Runtime linked against, e.g. "ucrt"
[elements.compiled_with.crt]
type = "string"
# Statically linked libraries in this HAT library
[elements.compiled_with.libraries]
type = "array"
arraytype = "types.referencedLibraryType"
# Optional additional compilation information not specified in this schema
[elements.compiled_with.auxiliary]
type = "table"
optional = true
# Table containing the C header code in this HAT file
[elements.declaration]
type = "table"
# String containing the entirety of the C header code
[elements.declaration.code]
type = "string"