add C generation and argmax models

2022-11-17 15:21:53 -08:00 · 2022-11-17 15:21:53 -08:00 · 0aea2494dc
--- a/README.md
+++ b/README.md
@ -13,9 +13,15 @@ Once compiled, weights can be stored as `float32` or `float16`.
 The following operators are supported:
 * `Conv1D`
 * `Conv2D`
+* `DepthwiseConv1D`
+* `DepthwiseConv2D`
 * `MaxPooling1D`
 * `MaxPooling2D`
+* `AveragePooling1D`
+* `AveragePooling2D`
 * `Dense`
+* `Activation`
+* `BatchNormalization`

 Plus some no-ops:
 * `InputLayer`
--- a/cli/src/cli.ts
+++ b/cli/src/cli.ts
@ -9,7 +9,8 @@ import {
    loadFlatJSONModel,
    loadTfjsModelJSON,
    Options, runModel, sampleModel, testAllModels,
-    testFloatConv
+    testFloatConv,
+    toCSource
 } from '../../src/main'

 interface CmdOptions {
@ -158,6 +159,7 @@ async function processModelFile(modelFile: string) {
    write(".asm", cres.thumb)
    write(".js", cres.js)
    write(".ml4f", cres.machineCode)
+    write(".c", toCSource(options.basename.replace(/[^\w]/g, "_"), cres.machineCode))
    write("_layerStats.json", JSON.stringify(cres.stats, null, 4))

    let evalInfo = `\n*** ${built(options.basename + ".ml4f")}\n\n`
--- a/sample/ml4f.c
+++ b/sample/ml4f.c
@ -84,6 +84,19 @@ uint32_t ml4f_shape_size(const uint32_t *shape, uint32_t type) {
    return ml4f_shape_elements(shape) << 2;
 }

+int ml4f_argmax(float *data, uint32_t size) {
+    if (size == 0)
+        return -1;
+    float max = data[0];
+    int maxidx = 0;
+    for (unsigned i = 0; i < size; ++i)
+        if (data[i] > max) {
+            max = data[i];
+            maxidx = i;
+        }
+    return maxidx;
+}
+
 // This function is just an example - you'll likely have your own tensor formats and memory
 // allocation functions

@ -101,3 +114,17 @@ int ml4f_full_invoke(const ml4f_header_t *model, const float *input, float *outp
    free(arena);
    return r;
 }
+
+int ml4f_full_invoke_argmax(const ml4f_header_t *model, const float *input) {
+    if (!ml4f_is_valid_header(model))
+        return -1;
+    uint8_t *arena = malloc(model->arena_bytes);
+    memcpy(arena + model->input_offset, input,
+           ml4f_shape_size(ml4f_input_shape(model), model->input_type));
+    int r = ml4f_invoke(model, arena);
+    if (r == 0)
+        r = ml4f_argmax((float *)(arena + model->output_offset),
+                        ml4f_shape_size(ml4f_output_shape(model), model->output_type) >> 2);
+    free(arena);
+    return r;
+}
--- a/sample/ml4f.h
+++ b/sample/ml4f.h
@ -3,6 +3,10 @@

 #include <stdint.h>

+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define ML4F_TYPE_FLOAT32 1

 #define ML4F_MAGIC0 0x30470f62
@ -37,6 +41,13 @@ const uint32_t *ml4f_input_shape(const ml4f_header_t *model);
 const uint32_t *ml4f_output_shape(const ml4f_header_t *model);
 uint32_t ml4f_shape_elements(const uint32_t *shape);
 uint32_t ml4f_shape_size(const uint32_t *shape, uint32_t type);
+int ml4f_argmax(float *data, uint32_t size);
+
 int ml4f_full_invoke(const ml4f_header_t *model, const float *input, float *output);
+int ml4f_full_invoke_argmax(const ml4f_header_t *model, const float *input);
+
+#ifdef __cplusplus
+}
+#endif

 #endif
--- a/src/driver.ts
+++ b/src/driver.ts
@ -34,7 +34,8 @@ export function assemble(src: string) {

    throwAssemblerErrors(procFile)

-    const binary = new Uint8Array(procFile.buf.length << 1)
+    // 16-byte aligned size
+    const binary = new Uint8Array(((procFile.buf.length << 1) + 15) & ~15)
    for (let i = 0; i < procFile.buf.length; ++i) {
        binary[i << 1] = procFile.buf[i] & 0xff
        binary[(i << 1) + 1] = (procFile.buf[i] >> 8) & 0xff
@ -283,3 +284,19 @@ export function loadFlatJSONModel(preModel: any) {

    return model
 }
+
+export function toCSource(name: string, machineCode: Uint8Array) {
+    if (machineCode.length & 3) throw new Error()
+    const u32 = new Uint32Array(machineCode.buffer)
+    let r = `const unsigned ${name}[${u32.length}] = {\n`
+    const chunk = 8
+    for (let off = 0; off < u32.length; off += chunk) {
+        r += "    "
+        r += Array.from(u32.slice(off, off + chunk))
+            .map(n => "0x" + ("00000000" + n.toString(16)).slice(-8) + ", ")
+            .join("")
+        r += "\n"
+    }
+    r += "};\n"
+    return r
+}