Adding initial schema and sample

2021-01-14 18:27:14 -08:00 · 2021-01-14 18:27:14 -08:00 · 6c588dded7
--- a/samples/sample_gemm_library.hat
+++ b/samples/sample_gemm_library.hat
@ -0,0 +1,112 @@
+#ifdef TOML
+
+[description]
+comment = "Automatically generated by the RoboCode compiler"
+author = "John Doe"
+version = "1.2.3.5"
+license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
+
+[functions]
+    [functions.GEMM_B94D27B9934D3E08]
+    name = "GEMM_B94D27B9934D3E08"
+    description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with specific sizes."
+    arguments = [
+        { name = "A", description = "left-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { affine_array = { shape = [ 512, 256 ], affine_map = [ 256, 1 ], affine_offset = 0 } } },
+        { name = "B", description = "right-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { affine_array = { shape = [ 256, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 } } },
+        { name = "C", description = "result accumulation matrix", logical_type = "affine_array", declared_type = "float*", element_type = "float", usage = "input_output", layout = { affine_array = { shape = [ 512, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 } } },
+        { name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
+        { name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" }
+    ]
+    return = { name = "return value", description = "GEMM_B94D27B9934D3E08 return value", logical_type = "void", declared_type = "void", element_type = "void", usage = "output" }
+
+        [functions.GEMM_B94D27B9934D3E08.auxiliary]
+            [functions.GEMM_B94D27B9934D3E08.auxiliary.ORT]
+            node = "gemm"
+
+            [functions.GEMM_B94D27B9934D3E08.auxiliary.MAIC]
+            node = "gemm"
+
+            [functions.GEMM_B94D27B9934D3E08.auxiliary.RoboCode]
+            implementation = "MLAS_Value_GEMM"
+            node = "GEMM"
+                [functions.GEMM_B94D27B9934D3E08.auxiliary.RoboCode.implementation_config]
+                MLAS_Value_CacheBMatrix = true
+
+
+    [functions.blas_sgemm_row_major]
+    name = "blas_sgemm_row_major"
+    description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with runtime-known sizes and non-transposed row-major matrices."
+    arguments = [
+        { name = "M", description = "Number of rows in the A and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
+        { name = "N", description = "Number of columns in the B and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
+        { name = "K", description = "Number of columns in the A matrix and rows in the B matrix", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
+        { name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
+        { name = "A", description = "left-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "lda * K" } } },
+        { name = "lda", description = "Leading dimension step size of A", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
+        { name = "B", description = "right-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "ldb * N" } } },
+        { name = "ldb", description = "Leading dimension step size of B", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
+        { name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
+        { name = "C", description = "result accumulation matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "ldc * N" } } },
+        { name = "ldc", description = "Leading dimension step size of C", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" }
+    ]
+    return = { name = "return value", description = "blas_sgemm_row_major return value", logical_type = "void", declared_type = "void", element_type = "void", usage = "output" }
+
+[target]
+    [target.required]
+    os = "windows"
+
+    [target.required.CPU]
+    architecture = "x86_64"
+    extensions = ["AVX2"]
+
+    [target.optimized_for.CPU]
+    name = "Intel Xeon E5-4669 v4"
+    family = "Broadwell"
+    clock_frequency = 2.2
+    cores = 22
+    threads = 44
+    
+        [target.optimized_for.CPU.cache]
+        cache_line = 64
+        instruction_KB = 32
+        l1_KB = 32
+        l2_KB = 256
+        l3_KB = 56320
+
+[dependencies]
+dynamic = [ { name = "Windows Universal C Runtime", version = "141", link_lib = "ucrtbase.dll" } ]
+
+[compiled_with]
+compiler = "MSVC141"
+flags = "-std=c++14 -ffast-math -fno-exceptions -fno-rtti"
+crt = "ucrt"
+libraries = [ { name = "OpenMP", version = "5.1", link_lib = "/openmp" } ]
+
+[declaration]
+code = '''
+#endif // TOML
+
+#pragma once
+
+#include <stdint.h>
+
+#if defined(__cplusplus)
+    extern "C"
+{
+#endif // defined(__cplusplus)
+
+//
+// Functions
+//
+
+void GEMM_B94D27B9934D3E08(const float* A, const float* B, float* C, float alpha, float beta);
+
+void blas_sgemm_row_major(int M, int N, int K, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc);
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif // defined(__cplusplus)
+
+#ifdef TOML
+'''
+#endif // TOML
--- a/schema/hat.tosd
+++ b/schema/hat.tosd
@ -0,0 +1,336 @@
+# HAT TOML Schema
+[toml-schema]
+version = "0.0.0.1"
+
+# Types to be used elsewhere in this schema
+[types]
+
+    # Layout type associated with "affine_array" logical parameter types
+    # "affine_array" logical parameters have fully compile-time-known sizes that are encoded in this metadata
+    [types.affineArrayLayoutType]
+    type = "table"
+
+        # Array giving the logical shape of the buffer, e.g. [256, 256]
+        [types.affineArrayLayoutType.shape]
+        type = "array"
+        arraytype = "integer"
+
+        # Array giving the affine map coefficients used to map from multi-dimensional coordinates to an offset in the C-style array buffer.
+        # e.g. [256, 1] would indicate that for index (i, j), the position in the buffer is (256 * i) + (1 * j)
+        # Should have the same number of elements as the shape array
+        [types.affineArrayLayoutType.affine_map]
+        type = "array"
+        arraytype = "integer"
+
+        # Offset from the buffer pointer where the logical affine array begins.
+        [types.affineArrayLayoutType.affine_offset]
+        type = "integer"
+    
+    # Layout type associated with "runtime_array" logical parameter types
+    # "runtime_array" logical parameters have a runtime-determined size that is given by other parameters to the function.
+    [types.runtimeArrayLayoutType]
+    type = "table"
+
+        # A string describing the number of elements in the buffer, typically expected to reference other parameters in the function.
+        # e.g. "N", "lda * K"
+        [types.runtimeArrayLayoutType.size]
+        type = "string"
+
+    # Parameter type of arguments and return values
+    [types.paramType]
+    type = "table"
+
+        # Name of the parameter
+        [types.paramType.name]
+        type = "string"
+
+        # Friendly string describing the parameter
+        [types.paramType.description]
+        type = "string"
+
+        # The logical type of the parameter, such as if it's really a multi-dimensional array
+        [types.paramType.logical_type]
+        type = "string"
+        allowedvalues = ["affine_array", "runtime_array", "void", "element"]
+
+        # The declared type of the parameter as a valid C type declaration
+        [types.paramType.declared_type]
+        type = "string"
+
+        # The type of elements in the parameter. E.g. "float" if the declared type is "const float*", "float*", or "float"
+        [types.paramType.element_type]
+        type = "string"
+
+        # The usage of the parameter in the context of the function
+        [types.paramType.usage]
+        type = "string"
+        allowedvalues = [ "input_output", "input", "output" ]
+
+        # The layout of the parameter, if applicable. Only array logical types have a layout.
+        # TOML schema only supports 'oneof' constraints on collections, so we use a collection and have a single dummy key in it
+        [types.paramType.layout]
+        type = "collection"
+        oneof = [ "types.affineArrayLayoutType", "types.runtimeArrayLayoutType" ]
+        minlength = 1
+        maxlength = 1
+        optional = true
+
+    # Type for a function described by TOML data and declared in the C declaration later in the HAT file
+    [types.functionType]
+    type = "table"
+
+        # The name of the function
+        [types.functionType.name]
+        type = "string"
+
+        # A friendly description of what the function does
+        [types.functionType.description]
+        type = "string"
+    
+        # An array of arguments to the function
+        [types.functionType.arguments]
+        type = "array"
+        arraytype = "types.paramType"
+
+        # The return type description of the function
+        [types.functionType.return]
+        typeof = "paramType"
+
+        # Optional additional usage-specific information about the function that isn't part of this schema
+        [types.functionType.auxiliary]
+        type = "table"
+        optional = true
+
+    # Type of an external library referenced by this metadata, such as a library that was linked into this one or a dependency a user must link
+    [types.referencedLibraryType]
+    type = "table"
+
+        # Friendly name of the library
+        [types.referencedLibraryType.name]
+        type = "string"
+
+        # Friendly version string of the library
+        [types.referencedLibraryType.version]
+        type = "string"
+
+        # The name of the library that is linked or a flag to link it with, as could be used by a build system.
+        # E.g. "ucrtbase.dll" or "/openmp"
+        [types.referencedLibraryType.link_lib]
+        type = "string"
+
+
+# Definition of the well-known tables/keys/etc in HAT TOML data
+[elements]
+
+    # Description of the HAT contents
+    [elements.description]
+    type = "table"
+
+        # Optional user-specified comment about the HAT file or library
+        [elements.description.comment]
+        type = "string"
+        optional = true
+
+        # Optional user-specified author name
+        [elements.description.author]
+        type = "string"
+        optional = true
+
+        # Version number of the library this HAT data represents
+        [elements.description.version]
+        type = "string"
+
+        # Url to the full license text for this library
+        [elements.description.license_url]
+        type = "string"
+
+    # Collection of functions declared within the HAT file and their metadata
+    # The keys in a collection are not prescribed by the schema, and in this case are the names of the functions as the HAT format does not support function overloading.
+    [elements.functions]
+    type = "collection"
+    typeof = "functionType"
+
+    # Table of information about the target device the functions described in this HAT file are intended to be used with
+    [elements.target]
+    type = "table"
+
+        # Required target features in order to run these functions without error
+        [elements.target.required]
+        type = "table"
+
+            # The OS that this HAT library is built for
+            [elements.target.required.os]
+            type = "string"
+            allowedvalues = [ "windows", "macos", "linux" ]
+
+            # Required CPU characteristics
+            [elements.target.required.CPU]
+            type = "table"
+
+                # Instruction set architecture, e.g. "x86_64"
+                [elements.target.required.CPU.architecture]
+                type = "string"
+
+                # Instruction set extensions used by these functions, e.g. "AVX2", "AVX512", etc
+                [elements.target.required.CPU.extensions]
+                type = "array"
+                arraytype = "string"
+
+                # Optional additional information not defined in this schema
+                [elements.target.required.CPU.auxiliary]
+                type = "table"
+                optional = true
+            
+            # Required GPU characteristics if there are GPU functions in this HAT library
+            [elements.target.required.GPU]
+            type = "table"
+            optional = true
+
+                # Required GPU runtime library
+                [elements.target.required.GPU.runtime]
+                type = "string"
+                allowedvalues = [ "cuda", "rocm", "vulkan" ]
+
+                # Minimum GPU instruction set version
+                [elements.target.required.GPU.instruction_set_version]
+                type = "string"
+
+                # Minimum number of GPU threads this library will attempt to use
+                [elements.target.required.GPU.min_threads]
+                type = "integer"
+
+                # Minimum global memory in KB that will be allocated
+                [elements.target.required.GPU.min_global_memory_KB]
+                type = "integer"
+                
+                # Minimum shared memory in KB that will be allocated
+                [elements.target.required.GPU.min_shared_memory_KB]
+                type = "integer"
+
+                # Optional additional requirements not specified in this schema
+                [elements.target.required.GPU.auxiliary]
+                type = "table"
+                optional = true
+
+        # Target characteristics that these functions are optimized for, but are not required in order to function
+        [elements.target.optimized_for]
+        type = "table"
+        optional = true
+
+            # Optimized CPU target information
+            [elements.target.optimized_for.CPU]
+            type = "table"
+            optional = true
+
+                # Full name and version of the CPU, e.g. "Intel Xeon E5-4669 v4"
+                [elements.target.optimized_for.CPU.name]
+                type = "string"
+
+                # Processor family, e.g. "Broadwell"
+                [elements.target.optimized_for.CPU.family]
+                type = "string"
+
+                # Base processor clock speed
+                [elements.target.optimized_for.CPU.clock_frequency]
+                type = "integer"
+
+                # Number of CPU cores
+                [elements.target.optimized_for.CPU.cores]
+                type = "integer"
+
+                # Number of CPU threads
+                [elements.target.optimized_for.CPU.threads]
+                type = "integer"
+
+                # Optimized cache characteristics
+                [elements.target.optimized_for.CPU.cache]
+                type = "table"
+                optional = true
+
+                    # Cache line size in bytes
+                    [elements.target.optimized_for.CPU.cache.cache_line]
+                    type = "integer"
+
+                    # Instruction cache size in KB
+                    [elements.target.optimized_for.CPU.cache.instruction_KB]
+                    type = "integer"
+
+                    # TODO : discussion - do we instead want this to be an array of cache sizes so 'l1', 'l2', 'l3' aren't baked into the schema?
+                    # Cache sizes in KB
+                    [elements.target.optimized_for.CPU.cache.l1_KB]
+                    type = "integer"
+                    [elements.target.optimized_for.CPU.cache.l2_KB]
+                    type = "integer"
+                    [elements.target.optimized_for.CPU.cache.l3_KB]
+                    type = "integer"
+                
+                # Optional additional optimized target characteristics not specified in this schema
+                [elements.target.optimized_for.CPU.auxiliary]
+                type = "table"
+                optional = true
+
+            # Optimized GPU target information
+            [elements.target.optimized_for.GPU]
+            type = "table"
+            optional = true
+                # Best optimized GPU core count
+                [elements.target.optimized_for.GPU.cores]
+                type = "integer"
+
+                # Best optimized GPU thread count
+                [elements.target.optimized_for.GPU.threads]
+                type = "integer"
+
+                # Optimized instruction set version
+                [elements.target.optimized_for.GPU.instruction_set_version]
+                type = "string"
+
+                # Optional additional optimized target characteristics not specified in this schema
+                [elements.target.optimized_for.GPU.auxiliary]
+                type = "table"
+                optional = true
+
+    # Table describing dependencies that projects using this HAT library must supply
+    [elements.dependencies]
+    type = "table"
+        # Dynamic libraries that must be linked
+        [elements.dependencies.dynamic]
+        type = "array"
+        arraytype = "types.referencedLibraryType"
+
+        # Optonal additonal dependency information not specified in this schema
+        [elements.dependencies.auxiliary]
+        type = "table"
+        optional = true
+
+    # Table giving information about how this HAT library was built
+    [elements.compiled_with]
+    type = "table"
+        # Compiler name and version, e.g. "MSVC141"
+        [elements.compiled_with.compiler]
+        type = "string"
+
+        # Compilation flags, e.g. "-std=c++14 -ffast-math"
+        [elements.compiled_with.flags]
+        type = "string"
+
+        # C Runtime linked against, e.g. "ucrt"
+        [elements.compiled_with.crt]
+        type = "string"
+
+        # Statically linked libraries in this HAT library
+        [elements.compiled_with.libraries]
+        type = "array"
+        arraytype = "types.referencedLibraryType"
+
+        # Optional additional compilation information not specified in this schema
+        [elements.compiled_with.auxiliary]
+        type = "table"
+        optional = true
+
+    # Table containing the C header code in this HAT file
+    [elements.declaration]
+    type = "table"
+        # String containing the entirety of the C header code
+        [elements.declaration.code]
+        type = "string"