diff --git a/nnvm/tests/python/compiler/test_rpc_exec.py b/nnvm/tests/python/compiler/test_rpc_exec.py
index 4cfd87ad..61baf18d 100644
--- a/nnvm/tests/python/compiler/test_rpc_exec.py
+++ b/nnvm/tests/python/compiler/test_rpc_exec.py
@@ -6,8 +6,8 @@ import numpy as np
def test_rpc_executor():
host = "localhost"
- port = 9091
- server = rpc.Server(host, port)
+ port = 9100
+ server = rpc.Server(host, port, use_popen=True)
x = sym.Variable("x")
y = sym.Variable("y")
diff --git a/nnvm/tutorials/define_and_compile_model.py b/nnvm/tutorials/define_and_compile_model.py
index 639684fb..35fe7b84 100644
--- a/nnvm/tutorials/define_and_compile_model.py
+++ b/nnvm/tutorials/define_and_compile_model.py
@@ -143,7 +143,7 @@ if not use_rasp:
# run server locally
host = 'localhost'
port = 9090
- server = rpc.Server(host=host, port=port)
+ server = rpc.Server(host=host, port=port, use_popen=True)
# compile and save model library
if use_rasp:
diff --git a/nnvm/tutorials/deploy_model_on_mali_gpu.py b/nnvm/tutorials/deploy_model_on_mali_gpu.py
new file mode 100644
index 00000000..53016607
--- /dev/null
+++ b/nnvm/tutorials/deploy_model_on_mali_gpu.py
@@ -0,0 +1,235 @@
+"""
+Deploy the Pretrained Model on ARM Mali GPU
+=======================================================
+**Author**: `Lianmin Zheng `_, `Ziheng Jiang `_
+
+This is an example of using NNVM to compile a ResNet model and
+deploy it on Firefly-RK3399 with ARM Mali GPU. We will use the
+Mali-T860 MP4 GPU on this board to accelerate the inference.
+
+This tutorial is based on the `tutorial `_
+for deploying on Raspberry Pi by `Ziheng Jiang `_.
+Great thanks to the original author, I only do several lines of modification.
+
+To begin with, we import nnvm (for compilation) and TVM (for deployment).
+"""
+import tvm
+import nnvm.compiler
+import nnvm.testing
+from tvm.contrib import util, rpc
+from tvm.contrib import graph_runtime as runtime
+
+
+######################################################################
+# Build TVM Runtime on Device
+# ---------------------------
+#
+# There're some prerequisites: we need build tvm runtime and set up
+# a RPC server on remote device.
+#
+# To get started, clone tvm repo from github. It is important to clone
+# the submodules along, with --recursive option (Assuming you are in
+# your home directory):
+#
+# .. code-block:: bash
+#
+# git clone --recursive https://github.com/dmlc/tvm
+#
+# .. note::
+#
+# Usually device has limited resources and we only need to build
+# runtime. The idea is we will use TVM compiler on the local server
+# to compile and upload the compiled program to the device and run
+# the device function remotely.
+#
+# .. code-block:: bash
+#
+# make runtime
+#
+# After success of buildind runtime, we need set environment varibles
+# in :code:`~/.bashrc` file of yourself account or :code:`/etc/profile`
+# of system enviroment variables. Assuming your TVM directory is in
+# :code:`~/tvm` and set environment variables below your account.
+#
+# .. code-block:: bash
+#
+# vi ~/.bashrc
+#
+# We need edit :code:`~/.bashrc` using :code:`vi ~/.bashrc` and add
+# lines below (Assuming your TVM directory is in :code:`~/tvm`):
+#
+# .. code-block:: bash
+#
+# export TVM_HOME=~/tvm
+# export PATH=$PATH:$TVM_HOME/lib
+# export PYTHONPATH=$PYTHONPATH:$TVM_HOME/python
+#
+# To enable updated :code:`~/.bashrc`, execute :code:`source ~/.bashrc`.
+
+######################################################################
+# Set Up RPC Server on Device
+# ---------------------------
+# To set up a TVM RPC server on the your ARM device (our remote device),
+# we have prepared a one-line script so you only need to run this
+# command after following the installation guide to install TVM on
+# your device:
+#
+# .. code-block:: bash
+#
+# python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090
+#
+# After executing command above, if you see these lines below, it's
+# successful to start RPC server on your device.
+#
+# .. code-block:: bash
+#
+# Loading runtime library /home/YOURNAME/code/tvm/lib/libtvm_runtime.so... exec only
+# INFO:root:RPCServer: bind to 0.0.0.0:9090
+#
+
+######################################################################
+# For demonstration, we simply start an RPC server on the same machine,
+# if :code:`use_mali` is False. If you have set up the remote
+# environment, please change the three lines below: change the
+# :code:`use_mali` to True, also change the :code:`host` and :code:`port`
+# with your device's host address and port number.
+
+use_mali = False
+host = '10.42.0.96'
+port = 9090
+
+if not use_mali:
+ # run server locally
+ host = 'localhost'
+ port = 9092
+ server = rpc.Server(host=host, port=port, use_popen=True)
+
+######################################################################
+# Prepare the Pretrained Model
+# ----------------------------
+# Back to the host machine, firstly, we need to download a MXNet Gluon
+# ResNet model from model zoo, which is pretrained on ImageNet. You
+# can found more details about this part at `Compile MXNet Models`
+
+from mxnet.gluon.model_zoo.vision import get_model
+from mxnet.gluon.utils import download
+from PIL import Image
+import numpy as np
+
+# only one line to get the model
+block = get_model('resnet18_v1', pretrained=True)
+
+######################################################################
+# In order to test our model, here we download an image of cat and
+# transform its format.
+img_name = 'cat.jpg'
+download('https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true', img_name)
+image = Image.open(img_name).resize((224, 224))
+
+def transform_image(image):
+ image = np.array(image) - np.array([123., 117., 104.])
+ image /= np.array([58.395, 57.12, 57.375])
+ image = image.transpose((2, 0, 1))
+ image = image[np.newaxis, :]
+ return image
+
+x = transform_image(image)
+
+
+######################################################################
+# synset is used to transform the label from number of ImageNet class to
+# the word human can understand.
+synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
+ '4d0b62f3d01426887599d4f7ede23ee5/raw/',
+ '596b27d23537e5a1b5751d2b0481ef172f58b539/',
+ 'imagenet1000_clsid_to_human.txt'])
+synset_name = 'synset.txt'
+download(synset_url, synset_name)
+with open(synset_name) as f:
+ synset = eval(f.read())
+
+######################################################################
+# Now we would like to port the Gluon model to a portable computational graph.
+# It's as easy as several lines.
+
+# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
+net, params = nnvm.frontend.from_mxnet(block)
+# we want a probability so add a softmax operator
+net = nnvm.sym.softmax(net)
+
+######################################################################
+# Here are some basic data workload configurations.
+batch_size = 1
+num_classes = 1000
+image_shape = (3, 224, 224)
+data_shape = (batch_size,) + image_shape
+out_shape = (batch_size, num_classes)
+
+######################################################################
+# Compile The Graph
+# -----------------
+# To compile the graph, we call the :any:`nnvm.compiler.build` function
+# with the graph configuration and parameters. As we use OpenCL for
+# GPU computing, the tvm will generate both OpenCL kernel code and ARM
+# CPU host code. The CPU host code is used for calling OpenCL kernels.
+# In order to generate correct CPU code, we need to specify the target
+# triplet for host ARM device by setting the parameter :code:`target_host`.
+
+######################################################################
+# If we run the example locally for demonstration, we can simply set
+# it as :code:`llvm`. If to run it on the ARM device, you need to specify
+# its instruction set. Here is the option I use for my Firefly-RK3399.
+
+if use_mali:
+ target_host = "llvm -target=aarch64-linux-gnu -mattr=+neon"
+else:
+ target_host = "llvm"
+
+# set target as `tvm.target.mali` instead of 'opencl' to enable
+# target-specified optimization
+graph, lib, params = nnvm.compiler.build(net, target=tvm.target.mali(),
+ shape={"data": data_shape}, params=params, target_host=target_host)
+
+# After `nnvm.compiler.build`, you will get three return values: graph,
+# library and the new parameter, since we do some optimization that will
+# change the parameters but keep the result of model as the same.
+
+# Save the library at local temporary directory.
+tmp = util.tempdir()
+lib_fname = tmp.relpath('net.tar')
+lib.export_library(lib_fname)
+
+######################################################################
+# Deploy the Model Remotely by RPC
+# --------------------------------
+# With RPC, you can deploy the model remotely from your host machine
+# to the remote device.
+
+# connect the server
+remote = rpc.connect(host, port)
+
+# upload the library to remote device and load it
+remote.upload(lib_fname)
+rlib = remote.load_module('net.tar')
+
+ctx = remote.cl(0)
+# upload the parameter
+rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}
+
+# create the remote runtime module
+module = runtime.create(graph, rlib, ctx)
+# set parameter
+module.set_input(**rparams)
+# set input data
+module.set_input('data', tvm.nd.array(x.astype('float32')))
+# run
+module.run()
+# get output
+out = module.get_output(0, tvm.nd.empty(out_shape, ctx=ctx))
+# get top1 result
+top1 = np.argmax(out.asnumpy())
+print('TVM prediction top-1: {}'.format(synset[top1]))
+
+if not use_mali:
+ # terminate the local server
+ server.terminate()
diff --git a/nnvm/tutorials/deploy_model_on_rasp.py b/nnvm/tutorials/deploy_model_on_rasp.py
index 1aa84087..033a8b88 100644
--- a/nnvm/tutorials/deploy_model_on_rasp.py
+++ b/nnvm/tutorials/deploy_model_on_rasp.py
@@ -85,8 +85,8 @@ from tvm.contrib import graph_runtime as runtime
# For demonstration, we simply start an RPC server on the same machine,
# if :code:`use_rasp` is False. If you have set up the remote
# environment, please change the three lines below: change the
-# :code:`use_rasp` to True, also change the host and port with your
-# device's host address and port number.
+# :code:`use_rasp` to True, also change the :code:`host` and :code:`port`
+# with your device's host address and port number.
use_rasp = False
host = 'rasp0'
@@ -96,7 +96,7 @@ if not use_rasp:
# run server locally
host = 'localhost'
port = 9091
- server = rpc.Server(host=host, port=port)
+ server = rpc.Server(host=host, port=port, use_popen=True)
######################################################################
# Prepare the Pretrained Model
@@ -114,7 +114,7 @@ import numpy as np
block = get_model('resnet18_v1', pretrained=True)
######################################################################
-# In order to test our model, here we download a image of cat and
+# In order to test our model, here we download an image of cat and
# transform its format.
img_name = 'cat.jpg'
download('https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true', img_name)