Bundled interpreter demo (#2297)
This commit is contained in:
Родитель
766008ca4f
Коммит
001ab52509
|
@ -0,0 +1,39 @@
|
|||
# Makefile Example to bundle TVM modules.
|
||||
TVM_ROOT=$(shell cd ../..; pwd)
|
||||
NNVM_PATH=nnvm
|
||||
DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
|
||||
PKG_CFLAGS = -std=c++14 -Oz -fPIC\
|
||||
-I${TVM_ROOT}/include\
|
||||
-I${DMLC_CORE}/include\
|
||||
-I${TVM_ROOT}/3rdparty/dlpack/include\
|
||||
|
||||
PKG_LDFLAGS = -L${TVM_ROOT}/build
|
||||
|
||||
build_dir := build
|
||||
|
||||
test: $(build_dir)/demo $(build_dir)/bundle.so
|
||||
$(build_dir)/demo $(build_dir)/bundle.so
|
||||
|
||||
$(build_dir)/demo: demo.cc
|
||||
@mkdir -p $(@D)
|
||||
$(CXX) $(PKG_CFLAGS) -o $@ $^
|
||||
|
||||
# Serialize our graph.json file.
|
||||
$(build_dir)/graph.json.cc: $(build_dir)/graph.json
|
||||
xxd -i $^ > $@
|
||||
|
||||
# Serialize our params.bin file.
|
||||
$(build_dir)/params.bin.cc: $(build_dir)/params.bin
|
||||
xxd -i $^ > $@
|
||||
|
||||
$(build_dir)/model.o $(build_dir)/graph.json $(build_dir)/params.bin: build_model.py
|
||||
python $< -o $(build_dir)
|
||||
|
||||
# Build our bundle against the serialized bundle.cc API, the runtime.cc API, and
|
||||
# the serialized graph.json and params.bin
|
||||
$(build_dir)/bundle.so: bundle.cc runtime.cc $(build_dir)/model.o $(build_dir)/graph.json.cc $(build_dir)/params.bin.cc
|
||||
@mkdir -p $(@D)
|
||||
$(CXX) $(PKG_CFLAGS) -fvisibility=hidden -o $@ $^ $(PKG_LDFLAGS) -shared
|
||||
|
||||
clean:
|
||||
rm -r $(build_dir)
|
|
@ -0,0 +1,35 @@
|
|||
How to Bundle TVM Modules
|
||||
=========================
|
||||
|
||||
This folder contains an example on how to bundle a TVM module (with the required
|
||||
interpreter runtime modules such as `runtime::GraphRuntime`, the graph JSON, and
|
||||
the params) into a single, self-contained shared object (`bundle.so`) which
|
||||
exposes a C API wrapping the appropriate `runtime::GraphRuntime` instance.
|
||||
|
||||
This is useful for cases where we'd like to avoid deploying the TVM runtime
|
||||
components to the target host in advance - instead, we simply deploy the bundled
|
||||
shared-object to the host, which embeds both the model and the runtime
|
||||
components. The bundle should only depend on libc/libc++.
|
||||
|
||||
It also contains an example code (`demo.cc`) to load this shared object and
|
||||
invoke the packaged TVM model instance. This is a dependency-free binary that
|
||||
uses the functionality packaged in `bundle.so` (which means that `bundle.so` can
|
||||
be deployed lazily at runtime, instead of at compile time) to invoke TVM
|
||||
functionality.
|
||||
|
||||
Type the following command to run the sample code under the current folder,
|
||||
after building TVM first.
|
||||
|
||||
```bash
|
||||
make demo
|
||||
```
|
||||
|
||||
This will:
|
||||
|
||||
- Download the mobilenet0.25 model from the MXNet Gluon Model Zoo
|
||||
- Compile the model with NNVM
|
||||
- Build a `bundle.so` shared object containing the model specification and
|
||||
parameters
|
||||
- Build a `demo` executable that `dlopen`'s `bundle.so`, instantiates the
|
||||
contained graph runtime, and invokes the `GraphRuntime::Run` function on a
|
||||
random input, then prints the output tensor to `stderr`.
|
|
@ -0,0 +1,40 @@
|
|||
"""Creates a simple TVM modules."""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import nnvm.compiler
|
||||
import nnvm.testing
|
||||
import tvm
|
||||
import logging
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-o', '--out-dir', default='.')
|
||||
opts = parser.parse_args()
|
||||
|
||||
dshape = (1, 3, 224, 224)
|
||||
from mxnet.gluon.model_zoo.vision import get_model
|
||||
block = get_model('mobilenet0.25', pretrained=True)
|
||||
net, params = nnvm.frontend.from_mxnet(block)
|
||||
net = nnvm.sym.softmax(net)
|
||||
|
||||
with nnvm.compiler.build_config(opt_level=3):
|
||||
graph, lib, params = nnvm.compiler.build(
|
||||
net, 'llvm --system-lib', shape={'data': dshape}, params=params)
|
||||
print(graph.symbol().debug_str())
|
||||
build_dir = os.path.abspath(opts.out_dir)
|
||||
if not os.path.isdir(build_dir):
|
||||
os.makedirs(build_dir)
|
||||
|
||||
lib.save(os.path.join(build_dir, 'model.o'))
|
||||
with open(os.path.join(build_dir, 'graph.json'), 'w') as f_graph_json:
|
||||
f_graph_json.write(graph.json())
|
||||
with open(os.path.join(build_dir, 'params.bin'), 'wb') as f_params:
|
||||
f_params.write(nnvm.compiler.save_param_dict(params))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,47 @@
|
|||
#include <memory>
|
||||
#include <tvm/runtime/c_runtime_api.h>
|
||||
#include <tvm/runtime/registry.h>
|
||||
|
||||
extern unsigned char build_graph_json[];
|
||||
extern unsigned int build_graph_json_len;
|
||||
extern unsigned char build_params_bin[];
|
||||
extern unsigned int build_params_bin_len;
|
||||
|
||||
#define TVM_BUNDLE_FUNCTION __attribute__((visibility("default"))) extern "C"
|
||||
|
||||
TVM_BUNDLE_FUNCTION void *tvm_runtime_create() {
|
||||
const std::string json_data(&build_graph_json[0],
|
||||
&build_graph_json[0] + build_graph_json_len);
|
||||
tvm::runtime::Module mod_syslib =
|
||||
(*tvm::runtime::Registry::Get("module._GetSystemLib"))();
|
||||
int device_type = kDLCPU;
|
||||
int device_id = 0;
|
||||
tvm::runtime::Module mod =
|
||||
(*tvm::runtime::Registry::Get("tvm.graph_runtime.create"))(
|
||||
json_data, mod_syslib, device_type, device_id);
|
||||
TVMByteArray params;
|
||||
params.data = reinterpret_cast<const char *>(&build_params_bin[0]);
|
||||
params.size = build_params_bin_len;
|
||||
mod.GetFunction("load_params")(params);
|
||||
return new tvm::runtime::Module(mod);
|
||||
}
|
||||
|
||||
TVM_BUNDLE_FUNCTION void tvm_runtime_destroy(void *handle) {
|
||||
delete reinterpret_cast<tvm::runtime::Module *>(handle);
|
||||
}
|
||||
|
||||
TVM_BUNDLE_FUNCTION void tvm_runtime_set_input(void *handle, const char *name,
|
||||
void *tensor) {
|
||||
reinterpret_cast<tvm::runtime::Module *>(handle)->GetFunction("set_input")(
|
||||
name, reinterpret_cast<DLTensor *>(tensor));
|
||||
}
|
||||
|
||||
TVM_BUNDLE_FUNCTION void tvm_runtime_run(void *handle) {
|
||||
reinterpret_cast<tvm::runtime::Module *>(handle)->GetFunction("run")();
|
||||
}
|
||||
|
||||
TVM_BUNDLE_FUNCTION void tvm_runtime_get_output(void *handle, int index,
|
||||
void *tensor) {
|
||||
reinterpret_cast<tvm::runtime::Module *>(handle)->GetFunction("get_output")(
|
||||
index, reinterpret_cast<DLTensor *>(tensor));
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
#include "tvm/runtime/c_runtime_api.h"
|
||||
#include <assert.h>
|
||||
#include <dlfcn.h> //dlopen
|
||||
#include <dlpack/dlpack.h>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
template <typename F> auto getFunc(void *bundle, const char *name) {
|
||||
dlerror();
|
||||
auto *f =
|
||||
reinterpret_cast<typename std::add_pointer<F>::type>(dlsym(bundle, name));
|
||||
assert(!dlerror());
|
||||
return f;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
assert(argc == 2 && "Usage: demo <bundle.so>");
|
||||
auto *bundle = dlopen(argv[1], RTLD_LAZY | RTLD_LOCAL);
|
||||
assert(bundle);
|
||||
|
||||
auto *handle = getFunc<void *()>(bundle, "tvm_runtime_create")();
|
||||
|
||||
std::vector<float> input_storage(1 * 3 * 224 * 224);
|
||||
std::mt19937 gen(0);
|
||||
for (auto &e : input_storage) {
|
||||
e = std::uniform_real_distribution<float>(0.0, 1.0)(gen);
|
||||
}
|
||||
|
||||
std::vector<int64_t> input_shape = {1, 3, 224, 224};
|
||||
DLTensor input;
|
||||
input.data = input_storage.data();
|
||||
input.ctx = DLContext{kDLCPU, 0};
|
||||
input.ndim = 4;
|
||||
input.dtype = DLDataType{kDLFloat, 32, 1};
|
||||
input.shape = input_shape.data();
|
||||
input.strides = nullptr;
|
||||
input.byte_offset = 0;
|
||||
getFunc<void(void *, const char *, void *)>(bundle, "tvm_runtime_set_input")(
|
||||
handle, "data", &input);
|
||||
|
||||
auto *ftvm_runtime_run =
|
||||
(auto (*)(void *)->void)dlsym(bundle, "tvm_runtime_run");
|
||||
assert(!dlerror());
|
||||
ftvm_runtime_run(handle);
|
||||
|
||||
std::vector<float> output_storage(1000);
|
||||
std::vector<int64_t> output_shape = {1, 1000};
|
||||
DLTensor output;
|
||||
output.data = output_storage.data();
|
||||
output.ctx = DLContext{kDLCPU, 0};
|
||||
output.ndim = 2;
|
||||
output.dtype = DLDataType{kDLFloat, 32, 1};
|
||||
output.shape = output_shape.data();
|
||||
output.strides = nullptr;
|
||||
output.byte_offset = 0;
|
||||
|
||||
getFunc<void(void *, int, void *)>(bundle, "tvm_runtime_get_output")(
|
||||
handle, 0, &output);
|
||||
for (auto i = 0; i < output_storage.size(); ++i) {
|
||||
std::cerr << "output[" << i << "]: " << output_storage[i] << std::endl;
|
||||
}
|
||||
getFunc<void(void *)>(bundle, "tvm_runtime_destroy")(handle);
|
||||
dlclose(bundle);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
#include <dlpack/dlpack.h>
|
||||
#include <tvm/runtime/module.h>
|
||||
#include <tvm/runtime/registry.h>
|
||||
#include <tvm/runtime/packed_func.h>
|
||||
|
||||
#include "../../src/runtime/c_runtime_api.cc"
|
||||
#include "../../src/runtime/cpu_device_api.cc"
|
||||
#include "../../src/runtime/workspace_pool.cc"
|
||||
#include "../../src/runtime/module_util.cc"
|
||||
#include "../../src/runtime/module.cc"
|
||||
#include "../../src/runtime/registry.cc"
|
||||
#include "../../src/runtime/file_util.cc"
|
||||
#include "../../src/runtime/threading_backend.cc"
|
||||
#include "../../src/runtime/thread_pool.cc"
|
||||
#include "../../src/runtime/ndarray.cc"
|
||||
#include "../../src/runtime/system_lib_module.cc"
|
||||
#include "../../src/runtime/graph/graph_runtime.cc"
|
Загрузка…
Ссылка в новой задаче