Incorporating review feedback from teams discussions

2021-01-26 22:04:14 -08:00 · 2021-01-26 22:04:14 -08:00 · fc8a87ad8b
--- a/samples/sample_gemm_library.hat
+++ b/samples/sample_gemm_library.hat
@ -5,15 +5,19 @@ comment = "Automatically generated by the RoboCode compiler"
 author = "John Doe"
 version = "1.2.3.5"
 license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
+library_name = "sample_gemm.lib"
+library_md5 = "ABCD1234..."

 [functions]
    [functions.GEMM_B94D27B9934D3E08]
    name = "GEMM_B94D27B9934D3E08"
    description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with specific sizes."
+    calling_convention = "__stdcall"
+    can_assert = false
    arguments = [
-        { name = "A", description = "left-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { affine_array = { shape = [ 512, 256 ], affine_map = [ 256, 1 ], affine_offset = 0 } } },
-        { name = "B", description = "right-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { affine_array = { shape = [ 256, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 } } },
-        { name = "C", description = "result accumulation matrix", logical_type = "affine_array", declared_type = "float*", element_type = "float", usage = "input_output", layout = { affine_array = { shape = [ 512, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 } } },
+        { name = "A", description = "left-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", shape = [ 512, 256 ], affine_map = [ 256, 1 ], affine_offset = 0 },
+        { name = "B", description = "right-hand input matrix", logical_type = "affine_array", declared_type = "const float*", element_type = "float", usage = "input", shape = [ 256, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 },
+        { name = "C", description = "result accumulation matrix", logical_type = "affine_array", declared_type = "float*", element_type = "float", usage = "input_output", shape = [ 512, 512 ], affine_map = [ 512, 1 ], affine_offset = 0 },
        { name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
        { name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" }
    ]
@ -28,6 +32,7 @@ license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"

            [functions.GEMM_B94D27B9934D3E08.auxiliary.RoboCode]
            implementation = "MLAS_Value_GEMM"
+            generator_version = "1.0"
            node = "GEMM"
                [functions.GEMM_B94D27B9934D3E08.auxiliary.RoboCode.implementation_config]
                MLAS_Value_CacheBMatrix = true
@ -36,17 +41,19 @@ license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
    [functions.blas_sgemm_row_major]
    name = "blas_sgemm_row_major"
    description = "CPU Implementation of the GEMM algorithm: C = alpha * (A * B) + beta * C with runtime-known sizes and non-transposed row-major matrices."
+    calling_convention = "__stdcall"
+    can_assert = true
    arguments = [
        { name = "M", description = "Number of rows in the A and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
        { name = "N", description = "Number of columns in the B and C matrices", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
        { name = "K", description = "Number of columns in the A matrix and rows in the B matrix", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
        { name = "alpha", description = "Scalar to apply to A*B matrix product", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
-        { name = "A", description = "left-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "lda * K" } } },
+        { name = "A", description = "left-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "lda * K" },
        { name = "lda", description = "Leading dimension step size of A", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
-        { name = "B", description = "right-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "ldb * N" } } },
+        { name = "B", description = "right-hand input matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "ldb * N" },
        { name = "ldb", description = "Leading dimension step size of B", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" },
        { name = "beta", description = "Scalar to apply to the input C matrix before accumulating", logical_type = "element", declared_type = "float", element_type = "float", usage = "input" },
-        { name = "C", description = "result accumulation matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", layout = { runtime_array = { size = "ldc * N" } } },
+        { name = "C", description = "result accumulation matrix", logical_type = "runtime_array", declared_type = "const float*", element_type = "float", usage = "input", size = "ldc * N" },
        { name = "ldc", description = "Leading dimension step size of C", logical_type = "element", declared_type = "int", element_type = "int", usage = "input" }
    ]
    return = { name = "return value", description = "blas_sgemm_row_major return value", logical_type = "void", declared_type = "void", element_type = "void", usage = "output" }
@ -65,13 +72,11 @@ license_url = "https://www.apache.org/licenses/LICENSE-2.0.html"
    clock_frequency = 2.2
    cores = 22
    threads = 44
-    
+
        [target.optimized_for.CPU.cache]
-        cache_line = 64
        instruction_KB = 32
-        l1_KB = 32
-        l2_KB = 256
-        l3_KB = 56320
+        sizes_KB = [ 32, 256, 56320 ]
+        line_sizes = [ 64, 64, 64 ]

 [dependencies]
 dynamic = [ { name = "Windows Universal C Runtime", version = "141", link_lib = "ucrtbase.dll" } ]
@ -80,7 +85,7 @@ dynamic = [ { name = "Windows Universal C Runtime", version = "141", link_lib =
 compiler = "MSVC141"
 flags = "-std=c++14 -ffast-math -fno-exceptions -fno-rtti"
 crt = "ucrt"
-libraries = [ { name = "OpenMP", version = "5.1", link_lib = "/openmp" } ]
+libraries = [ { name = "LLVM OpenMP", version = "5.1", link_lib = "libomp.lib" } ]

 [declaration]
 code = '''
--- a/schema/hat.tosd
+++ b/schema/hat.tosd
@ -1,41 +1,10 @@
 # HAT TOML Schema
 [toml-schema]
-version = "0.0.0.1"
+version = "0.0.0.2"

 # Types to be used elsewhere in this schema
 [types]

-    # Layout type associated with "affine_array" logical parameter types
-    # "affine_array" logical parameters have fully compile-time-known sizes that are encoded in this metadata
-    [types.affineArrayLayoutType]
-    type = "table"
-
-        # Array giving the logical shape of the buffer, e.g. [256, 256]
-        [types.affineArrayLayoutType.shape]
-        type = "array"
-        arraytype = "integer"
-
-        # Array giving the affine map coefficients used to map from multi-dimensional coordinates to an offset in the C-style array buffer.
-        # e.g. [256, 1] would indicate that for index (i, j), the position in the buffer is (256 * i) + (1 * j)
-        # Should have the same number of elements as the shape array
-        [types.affineArrayLayoutType.affine_map]
-        type = "array"
-        arraytype = "integer"
-
-        # Offset from the buffer pointer where the logical affine array begins.
-        [types.affineArrayLayoutType.affine_offset]
-        type = "integer"
-    
-    # Layout type associated with "runtime_array" logical parameter types
-    # "runtime_array" logical parameters have a runtime-determined size that is given by other parameters to the function.
-    [types.runtimeArrayLayoutType]
-    type = "table"
-
-        # A string describing the number of elements in the buffer, typically expected to reference other parameters in the function.
-        # e.g. "N", "lda * K"
-        [types.runtimeArrayLayoutType.size]
-        type = "string"
-
    # Parameter type of arguments and return values
    [types.paramType]
    type = "table"
@ -66,13 +35,31 @@ version = "0.0.0.1"
        type = "string"
        allowedvalues = [ "input_output", "input", "output" ]

-        # The layout of the parameter, if applicable. Only array logical types have a layout.
-        # TOML schema only supports 'oneof' constraints on collections, so we use a collection and have a single dummy key in it
-        [types.paramType.layout]
-        type = "collection"
-        oneof = [ "types.affineArrayLayoutType", "types.runtimeArrayLayoutType" ]
-        minlength = 1
-        maxlength = 1
+        # Optional array giving the logical shape of the buffer for an affine_array logical type, e.g. [256, 256]
+        [types.paramType.shape]
+        type = "array"
+        arraytype = "integer"
+        optional = true
+
+        # Optional array giving the affine map coefficients used to map from multi-dimensional coordinates to an
+        # offset in the C-style array buffer for an affine_array logical type.
+        # e.g. [256, 1] would indicate that for index (i, j), the position in the buffer is (256 * i) + (1 * j)
+        # Should have the same number of elements as the shape array
+        [types.paramType.affine_map]
+        type = "array"
+        arraytype = "integer"
+        optional = true
+
+        # Offset from the buffer pointer where the array data begins for an affine_array logical type.
+        [types.paramType.affine_offset]
+        type = "integer"
+        optional = true
+
+        # A string describing the number of elements in the buffer for a runtime_array logical type.
+        # Typically expected to reference other parameters in the function.
+        # e.g. "N", "lda * K"
+        [types.paramType.size]
+        type = "string"
        optional = true

    # Type for a function described by TOML data and declared in the C declaration later in the HAT file
@ -86,6 +73,15 @@ version = "0.0.0.1"
        # A friendly description of what the function does
        [types.functionType.description]
        type = "string"
+
+        # The calling convention for this function
+        [types.functionType.calling_convention]
+        type = "string"
+        allowedvalues = [ "__stdcall", "__cdecl", "__fastcall", "__vectorcall" ]
+
+        # Whether or not this function can assert based on arguments given
+        [types.functionType.can_assert]
+        type = "boolean"
    
        # An array of arguments to the function
        [types.functionType.arguments]
@ -144,6 +140,14 @@ version = "0.0.0.1"
        [elements.description.license_url]
        type = "string"

+        # Library file name
+        [elements.description.library_name]
+        type = "string"
+
+        # MD5 hash of the library file this .hat file is associated with
+        [elements.description.library_md5]
+        type = "string"
+
    # Collection of functions declared within the HAT file and their metadata
    # The keys in a collection are not prescribed by the schema, and in this case are the names of the functions as the HAT format does not support function overloading.
    [elements.functions]
@ -247,22 +251,19 @@ version = "0.0.0.1"
                type = "table"
                optional = true

-                    # Cache line size in bytes
-                    [elements.target.optimized_for.CPU.cache.cache_line]
-                    type = "integer"
-
                    # Instruction cache size in KB
                    [elements.target.optimized_for.CPU.cache.instruction_KB]
                    type = "integer"

-                    # TODO : discussion - do we instead want this to be an array of cache sizes so 'l1', 'l2', 'l3' aren't baked into the schema?
-                    # Cache sizes in KB
-                    [elements.target.optimized_for.CPU.cache.l1_KB]
-                    type = "integer"
-                    [elements.target.optimized_for.CPU.cache.l2_KB]
-                    type = "integer"
-                    [elements.target.optimized_for.CPU.cache.l3_KB]
-                    type = "integer"
+                    # Ordered cache sizes in KB
+                    [elements.target.optimized_for.CPU.cache.sizes_KB]
+                    type = "array"
+                    arraytype = "integer"
+
+                    # Ordered cache line sizes in bytes
+                    [elements.target.optimized_for.CPU.cache.line_sizes]
+                    type = "array"
+                    arraytype = "integer"
                
                # Optional additional optimized target characteristics not specified in this schema
                [elements.target.optimized_for.CPU.auxiliary]
@ -273,6 +274,7 @@ version = "0.0.0.1"
            [elements.target.optimized_for.GPU]
            type = "table"
            optional = true
+
                # Best optimized GPU core count
                [elements.target.optimized_for.GPU.cores]
                type = "integer"
@ -290,9 +292,10 @@ version = "0.0.0.1"
                type = "table"
                optional = true

-    # Table describing dependencies that projects using this HAT library must supply
+    # Table describing dependencies that projects using this HAT library must supply at runtime
    [elements.dependencies]
    type = "table"
+
        # Dynamic libraries that must be linked
        [elements.dependencies.dynamic]
        type = "array"
@ -306,6 +309,7 @@ version = "0.0.0.1"
    # Table giving information about how this HAT library was built
    [elements.compiled_with]
    type = "table"
+
        # Compiler name and version, e.g. "MSVC141"
        [elements.compiled_with.compiler]
        type = "string"
@ -331,6 +335,7 @@ version = "0.0.0.1"
    # Table containing the C header code in this HAT file
    [elements.declaration]
    type = "table"
+
        # String containing the entirety of the C header code
        [elements.declaration.code]
        type = "string"