Optimize Linux shared library modules (*.so files) (#2445)

This commit is contained in:
Anthony Mai 2019-01-28 21:14:18 -08:00 коммит произвёл Tianqi Chen
Родитель 174c92a83c
Коммит 75f91c45d7
16 изменённых файлов: 117 добавлений и 135 удалений

2
3rdparty/HalideIR поставляемый

@ -1 +1 @@
Subproject commit 6e7c1f046fda536562dc80977e93324fee2324bd
Subproject commit 97efb11fff13131480fcaa5adc65a0aef4a4cb5d

2
3rdparty/dlpack поставляемый

@ -1 +1 @@
Subproject commit bee4d1dd8dc1ee4a1fd8fa6a96476c2f8b7492a3
Subproject commit 5c792cef3aee54ad8b7000111c9dc1797f327b59

Просмотреть файл

@ -84,10 +84,10 @@ else(MSVC)
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
add_compile_options(-O0 -Wall -fPIC -std=c++11)
add_compile_options(-O0 -Wall -fPIC -fvisibility=hidden -std=c++11)
else()
set(CMAKE_C_FLAGS "-O2 -Wall -fPIC ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-O2 -Wall -fPIC -std=c++11 ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-O2 -Wall -fPIC -fvisibility=hidden ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-O2 -Wall -fPIC -fvisibility=hidden -std=c++11 ${CMAKE_CXX_FLAGS}")
endif ()
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)

Просмотреть файл

@ -1974,7 +1974,7 @@ INCLUDE_FILE_PATTERNS =
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
PREDEFINED = DMLC_USE_CXX11
PREDEFINED = DMLC_USE_CXX11 TVM_DLL= NNVM_DLL= __attribute__(x)=
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The

Просмотреть файл

@ -332,26 +332,6 @@ TVM_DLL Expr max(Expr a, Expr b);
* index types(int32, int64) when possible.
*/
TVM_DLL Expr min(Expr a, Expr b);
/*!
* \brief right shift
*
* \param a left operand
* \param b right operand
* \return The result expression.
* \note this function does eager constant folding for
* index types(int32, int64) when possible.
*/
TVM_DLL Expr operator>>(Expr a, Expr b);
/*!
* \brief left shift
*
* \param a left operand
* \param b right operand
* \return The result expression.
* \note this function does eager constant folding for
* index types(int32, int64) when possible.
*/
TVM_DLL Expr operator<<(Expr a, Expr b);
/*!
* \brief take bitwise and of two values
*

Просмотреть файл

@ -27,7 +27,7 @@ namespace ir {
* \param vrange The range information about the variable.
* \return Canonicalized statement.
*/
EXPORT Expr Simplify(Expr expr, Map<Var, Range> vrange = Map<Var, Range>());
TVM_DLL Expr Simplify(Expr expr, Map<Var, Range> vrange = Map<Var, Range>());
/*!
* \brief Simplify the statement.
@ -52,7 +52,7 @@ Stmt CanonicalSimplify(Stmt stmt,
* \param vrange The range information about the variable.
* \return Canonicalized expression.
*/
EXPORT Expr CanonicalSimplify(Expr expr,
TVM_DLL Expr CanonicalSimplify(Expr expr,
Map<Var, Range> vrange = Map<Var, Range>());
/*!
@ -61,7 +61,7 @@ EXPORT Expr CanonicalSimplify(Expr expr,
* \param rhs The right operand
* \return The comparison result.
*/
EXPORT bool Equal(const Expr& lhs, const Expr& rhs);
TVM_DLL bool Equal(const Expr& lhs, const Expr& rhs);
/*!
* \brief Deep compare lhs and rhs
@ -92,13 +92,13 @@ int Compare(const Expr& lhs, const Expr& rhs);
* \return Whether IR is in SSA form.
* \note All the passes in this file uses SSA form and outputs SSA form.
*/
bool VerifySSA(const Stmt& ir);
TVM_DLL bool VerifySSA(const Stmt& ir);
/*!
* \brief Whether the expression have side effect.
* \return whether expression have side effect
*/
bool HasSideEffect(const Expr& e);
TVM_DLL bool HasSideEffect(const Expr& e);
/*!
* \brief Whether e expression used var.
@ -121,7 +121,7 @@ bool ExprUseVar(const Expr& e, const std::unordered_set<const Variable*>& vset);
* \param stmt The source statement to be converted.
* \return The converted form.
*/
Stmt ConvertSSA(Stmt stmt);
TVM_DLL Stmt ConvertSSA(Stmt stmt);
/*!
* \brief Substitute the var specified in key->var to be value.

Просмотреть файл

@ -131,7 +131,7 @@ class TVM_DLL IRVisitor {
* \param node The ir to be visited.
* \param fvisit The visitor function to be applied.
*/
void PostOrderVisit(const NodeRef& node, std::function<void(const NodeRef&)> fvisit);
TVM_DLL void PostOrderVisit(const NodeRef& node, std::function<void(const NodeRef&)> fvisit);
} // namespace ir
} // namespace tvm

Просмотреть файл

@ -27,7 +27,7 @@ namespace relay {
*
* \return A type checked expression with its checked_type field populated.
*/
Expr InferType(const Expr& expr, const Module& mod);
TVM_DLL Expr InferType(const Expr& expr, const Module& mod);
/*!
* \brief Infer the type of a function as if it is mapped to var in the mod.
@ -39,7 +39,7 @@ Expr InferType(const Expr& expr, const Module& mod);
* \return A type checked Function with its checked_type field populated.
* \note this function mutates mod and is not thread-safe.
*/
Function InferType(const Function& f, const Module& mod,
TVM_DLL Function InferType(const Function& f, const Module& mod,
const GlobalVar& var);
/*!
@ -58,7 +58,7 @@ Function InferType(const Function& f, const Module& mod,
*
* \return true if the rules are satisified otherwise false
*/
bool KindCheck(const Type& t, const Module& mod);
TVM_DLL bool KindCheck(const Type& t, const Module& mod);
/*! \brief Compare two expressions for structural equivalence.
*
@ -75,7 +75,7 @@ bool KindCheck(const Type& t, const Module& mod);
*
* \return true if equal, otherwise false
*/
bool AlphaEqual(const Expr& e1, const Expr& e2);
TVM_DLL bool AlphaEqual(const Expr& e1, const Expr& e2);
/*! \brief Compare two types for structural equivalence.
*
@ -93,7 +93,7 @@ bool AlphaEqual(const Expr& e1, const Expr& e2);
*
* \return true if equal, otherwise false
*/
bool AlphaEqual(const Type& t1, const Type& t2);
TVM_DLL bool AlphaEqual(const Type& t1, const Type& t2);
/*! \brief Check that each Var is only bound once.
*
@ -106,7 +106,7 @@ bool AlphaEqual(const Type& t1, const Type& t2);
*
* \return true iff all Var in expr is bound at most once.
*/
bool WellFormed(const Expr& expr);
TVM_DLL bool WellFormed(const Expr& expr);
/*! \brief Get all bound variables from expression expr.
*
@ -117,7 +117,7 @@ bool WellFormed(const Expr& expr);
*
* \return List of bound vars, in the PostDFS order in the expression.
*/
tvm::Array<Var> BoundVars(const Expr& expr);
TVM_DLL tvm::Array<Var> BoundVars(const Expr& expr);
/*! \brief Get free type parameters from expression expr.
*
@ -128,7 +128,7 @@ tvm::Array<Var> BoundVars(const Expr& expr);
*
* \return List of free vars, in the PostDFS order in the expression.
*/
tvm::Array<Var> FreeVars(const Expr& expr);
TVM_DLL tvm::Array<Var> FreeVars(const Expr& expr);
/*! \brief Get all variables from expression expr.
*
@ -136,7 +136,7 @@ tvm::Array<Var> FreeVars(const Expr& expr);
*
* \return List of all vars, in the PostDFS order in the expression.
*/
tvm::Array<Var> AllVars(const Expr& expr);
TVM_DLL tvm::Array<Var> AllVars(const Expr& expr);
/*! \brief Get free TypeVars from expression expr.
*
@ -147,7 +147,7 @@ tvm::Array<Var> AllVars(const Expr& expr);
*
* \return List of free vars, in the PostDFS order visited by expr.
*/
tvm::Array<TypeVar> FreeTypeVars(const Expr& expr);
TVM_DLL tvm::Array<TypeVar> FreeTypeVars(const Expr& expr);
/*! \brief Get free TypeVars from type t.
*
@ -158,7 +158,7 @@ tvm::Array<TypeVar> FreeTypeVars(const Expr& expr);
*
* \return List of free type vars, in the PostDFS order visited by type.
*/
tvm::Array<TypeVar> FreeTypeVars(const Type& t);
TVM_DLL tvm::Array<TypeVar> FreeTypeVars(const Type& t);
/*! \brief Get all bound type variables from expression expr.
*
@ -169,7 +169,7 @@ tvm::Array<TypeVar> FreeTypeVars(const Type& t);
*
* \return List of bound type vars, in the PostDFS order in the expression.
*/
tvm::Array<TypeVar> BoundTypeVars(const Expr& expr);
TVM_DLL tvm::Array<TypeVar> BoundTypeVars(const Expr& expr);
/*! \brief Get all bound type variables from type t.
*
@ -180,7 +180,7 @@ tvm::Array<TypeVar> BoundTypeVars(const Expr& expr);
*
* \return List of bound type vars, in the PostDFS order visited by type.
*/
tvm::Array<TypeVar> BoundTypeVars(const Type& t);
TVM_DLL tvm::Array<TypeVar> BoundTypeVars(const Type& t);
/*! \brief Get all type variables in expression expr.
*
@ -188,7 +188,7 @@ tvm::Array<TypeVar> BoundTypeVars(const Type& t);
*
* \return List of type vars, in the PostDFS order in the expression.
*/
tvm::Array<TypeVar> AllTypeVars(const Expr& expr);
TVM_DLL tvm::Array<TypeVar> AllTypeVars(const Expr& expr);
/*! \brief Get all type variables in type t.
*
@ -196,7 +196,7 @@ tvm::Array<TypeVar> AllTypeVars(const Expr& expr);
*
* \return List of type vars, in the PostDFS order visited by type.
*/
tvm::Array<TypeVar> AllTypeVars(const Type& t);
TVM_DLL tvm::Array<TypeVar> AllTypeVars(const Type& t);
/*! \brief Remove expressions which does not effect the program result.
*
@ -211,14 +211,14 @@ tvm::Array<TypeVar> AllTypeVars(const Type& t);
*
* \return the optimized expression.
*/
Expr DeadCodeElimination(const Expr& e);
TVM_DLL Expr DeadCodeElimination(const Expr& e);
/*!
* \brief Fold constant expressions.
* \param expr the expression to be optimized.
* \return The optimized expression.
*/
Expr FoldConstant(const Expr& expr);
TVM_DLL Expr FoldConstant(const Expr& expr);
/*!
* \brief Fuse operations into expr into seperate functions.
@ -226,7 +226,7 @@ Expr FoldConstant(const Expr& expr);
* \param fuse_opt_level Optimization level.
* \return The optimized expression.
*/
Expr FuseOps(const Expr& expr, int fuse_opt_level);
TVM_DLL Expr FuseOps(const Expr& expr, int fuse_opt_level);
/*!
* \brief Apply rewrite rules to rewrite the expr in post DFS order.
@ -238,7 +238,7 @@ Expr FuseOps(const Expr& expr, int fuse_opt_level);
* an Expr consumed by multiple callers.
* \return The rewritten expression.
*/
Expr ForwardRewrite(const Expr& expr,
TVM_DLL Expr ForwardRewrite(const Expr& expr,
const std::string& rewrite_map_attr_name,
std::function<NodeRef(const Call&)> fcontext = nullptr,
std::function<Expr(const Expr&)> fmulti_ref_trigger = nullptr);
@ -252,7 +252,7 @@ Expr ForwardRewrite(const Expr& expr,
* an Expr consumed by multiple callers.
* \return The rewritten expression.
*/
Expr ForwardRewrite(const Expr& expr,
TVM_DLL Expr ForwardRewrite(const Expr& expr,
const FForwardRewrite& rewrite_func,
std::function<NodeRef(const Call&)> fcontext = nullptr,
std::function<Expr(const Expr&)> fmulti_ref_trigger = nullptr);
@ -264,14 +264,14 @@ Expr ForwardRewrite(const Expr& expr,
* operators without annotation.
* \return The updated program.
*/
Expr RewriteAnnotatedOps(const Expr& expr, int fallback_device);
TVM_DLL Expr RewriteAnnotatedOps(const Expr& expr, int fallback_device);
/*!
* \brief Collect the device mapping information of each expression.
* \param expr The expression.
* \return The device mapping.
*/
Map<Expr, Integer> CollectDeviceInfo(const Expr& expr);
TVM_DLL Map<Expr, Integer> CollectDeviceInfo(const Expr& expr);
/*! \brief A hashing structure in the style of std::hash. */
struct StructuralHash {

Просмотреть файл

@ -38,7 +38,7 @@
#define TVM_DLL __declspec(dllimport)
#endif
#else
#define TVM_DLL
#define TVM_DLL __attribute__((visibility("default")))
#endif
#endif

Просмотреть файл

@ -40,7 +40,7 @@ constexpr int kMaxStackAlloca = 1024;
* \brief TVM Runtime Device API, abstracts the device
* specific interface for memory management.
*/
class DeviceAPI {
class TVM_DLL DeviceAPI {
public:
/*! \brief virtual destructor */
virtual ~DeviceAPI() {}
@ -103,7 +103,7 @@ class DeviceAPI {
*
* \param ctx The context of allocation.
*/
TVM_DLL virtual TVMStreamHandle CreateStream(TVMContext ctx);
virtual TVMStreamHandle CreateStream(TVMContext ctx);
/*!
* \brief Free a stream of execution
@ -111,7 +111,7 @@ class DeviceAPI {
* \param ctx The context of the stream
* \param stream The pointer to be freed.
*/
TVM_DLL virtual void FreeStream(TVMContext ctx, TVMStreamHandle stream);
virtual void FreeStream(TVMContext ctx, TVMStreamHandle stream);
/*!
* \brief Synchronize the stream
@ -137,7 +137,7 @@ class DeviceAPI {
* \param event_src The source stream to synchronize.
* \param event_dst The destination stream to synchronize.
*/
TVM_DLL virtual void SyncStreamFromTo(TVMContext ctx,
virtual void SyncStreamFromTo(TVMContext ctx,
TVMStreamHandle event_src,
TVMStreamHandle event_dst);
/*!
@ -156,7 +156,7 @@ class DeviceAPI {
* \param type_hint The type of elements. Only needed by certain backends such
* as OpenGL, as nbytes is sufficient for most backends.
*/
TVM_DLL virtual void* AllocWorkspace(TVMContext ctx,
virtual void* AllocWorkspace(TVMContext ctx,
size_t nbytes,
TVMType type_hint = {});
/*!
@ -165,7 +165,7 @@ class DeviceAPI {
* \param ctx The context of allocation.
* \param ptr The pointer to be freed.
*/
TVM_DLL virtual void FreeWorkspace(TVMContext ctx, void* ptr);
virtual void FreeWorkspace(TVMContext ctx, void* ptr);
/*!
* \brief Get device API base don context.

Просмотреть файл

@ -16,7 +16,7 @@
#define NNVM_DLL __declspec(dllimport)
#endif
#else
#define NNVM_DLL
#define NNVM_DLL __attribute__((visibility("default")))
#endif
/*! \brief manually define unsigned int */

Просмотреть файл

@ -145,7 +145,7 @@ class InterpreterStateNode : public Node {
v->Visit("stack", &stack);
}
TVM_DLL static InterpreterState make(Expr current_expr, Stack stack);
static InterpreterState make(Expr current_expr, Stack stack);
static constexpr const char* _type_key = "relay.InterpreterState";
TVM_DECLARE_NODE_TYPE_INFO(InterpreterStateNode, Node);

Просмотреть файл

@ -11,11 +11,11 @@ extern "C" {
// disable under msvc
#ifndef _MSC_VER
TVM_WEAK uint16_t __gnu_f2h_ieee(float a) {
TVM_DLL TVM_WEAK uint16_t __gnu_f2h_ieee(float a) {
return __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(a);
}
TVM_WEAK float __gnu_h2f_ieee(uint16_t a) {
TVM_DLL TVM_WEAK float __gnu_h2f_ieee(uint16_t a) {
return __extendXfYf2__<uint16_t, uint16_t, 10, float, uint32_t, 23>(a);
}

Просмотреть файл

@ -22,7 +22,7 @@ namespace runtime {
* - The release order is usually in reverse order of allocate
* - Repeative pattern of same allocations over different runs.
*/
class WorkspacePool {
class TVM_DLL WorkspacePool {
public:
/*!
* \brief Create pool with specific device type and device.

Просмотреть файл

@ -11,6 +11,7 @@
extern "C" {
#endif
#include <tvm/runtime/c_runtime_api.h>
#include "driver.h"
#define VTA_MEMCPY_H2D 1
@ -28,13 +29,13 @@ extern "C" {
* \param size Buffer size.
* \return A pointer to the allocated buffer.
*/
void* VTABufferAlloc(size_t size);
TVM_DLL void* VTABufferAlloc(size_t size);
/*!
* \brief Free data buffer.
* \param buffer The data buffer to be freed.
*/
void VTABufferFree(void* buffer);
TVM_DLL void VTABufferFree(void* buffer);
/*!
* \brief Copy data buffer from one location to another.
@ -45,7 +46,7 @@ void VTABufferFree(void* buffer);
* \param size Size of copy.
* \param kind_mask The memory copy kind.
*/
void VTABufferCopy(const void* from,
TVM_DLL void VTABufferCopy(const void* from,
size_t from_offset,
void* to,
size_t to_offset,
@ -56,13 +57,13 @@ void VTABufferCopy(const void* from,
typedef void* VTACommandHandle;
/*! \brief Shutdown hook of VTA to cleanup resources */
void VTARuntimeShutdown();
TVM_DLL void VTARuntimeShutdown();
/*!
* \brief Get thread local command handle.
* \return A thread local command handle.
*/
VTACommandHandle VTATLSCommandHandle();
TVM_DLL VTACommandHandle VTATLSCommandHandle();
/*!
* \brief Get the buffer access pointer on CPU.
@ -70,7 +71,7 @@ VTACommandHandle VTATLSCommandHandle();
* \param buffer The data buffer.
* \return The pointer that can be accessed by the CPU.
*/
void* VTABufferCPUPtr(VTACommandHandle cmd, void* buffer);
TVM_DLL void* VTABufferCPUPtr(VTACommandHandle cmd, void* buffer);
/*!
* \brief Perform a write barrier to make a memory region visible to the CPU.
@ -80,7 +81,7 @@ void* VTABufferCPUPtr(VTACommandHandle cmd, void* buffer);
* \param start The start of the region (in elements).
* \param extent The end of the region (in elements).
*/
void VTAWriteBarrier(VTACommandHandle cmd,
TVM_DLL void VTAWriteBarrier(VTACommandHandle cmd,
void* buffer,
uint32_t elem_bits,
uint32_t start,
@ -93,7 +94,7 @@ void VTAWriteBarrier(VTACommandHandle cmd,
* \param start The start of the region (in elements).
* \param extent The end of the region (in elements).
*/
void VTAReadBarrier(VTACommandHandle cmd,
TVM_DLL void VTAReadBarrier(VTACommandHandle cmd,
void* buffer,
uint32_t elem_bits,
uint32_t start,
@ -104,7 +105,7 @@ void VTAReadBarrier(VTACommandHandle cmd,
* \param cmd The VTA command handle.
* \param debug_flag The debug flag.
*/
void VTASetDebugMode(VTACommandHandle cmd, int debug_flag);
TVM_DLL void VTASetDebugMode(VTACommandHandle cmd, int debug_flag);
/*!
* \brief Perform a 2D data load from DRAM.
@ -122,7 +123,7 @@ void VTASetDebugMode(VTACommandHandle cmd, int debug_flag);
* \param dst_sram_index Destination SRAM index.
* \param dst_memory_type Destination memory type.
*/
void VTALoadBuffer2D(VTACommandHandle cmd,
TVM_DLL void VTALoadBuffer2D(VTACommandHandle cmd,
void* src_dram_addr,
uint32_t src_elem_offset,
uint32_t x_size,
@ -147,7 +148,7 @@ void VTALoadBuffer2D(VTACommandHandle cmd,
* \param y_size The number of rows.
* \param x_stride The x axis stride.
*/
void VTAStoreBuffer2D(VTACommandHandle cmd,
TVM_DLL void VTAStoreBuffer2D(VTACommandHandle cmd,
uint32_t src_sram_index,
uint32_t src_memory_type,
void* dst_dram_addr,
@ -187,7 +188,7 @@ void VTAStoreBuffer2D(VTACommandHandle cmd,
* \param use_imm Use immediate in ALU mode if set to true.
* \param imm_val Immediate value in ALU mode.
*/
void VTAUopPush(uint32_t mode,
TVM_DLL void VTAUopPush(uint32_t mode,
uint32_t reset_out,
uint32_t dst_index,
uint32_t src_index,
@ -203,7 +204,7 @@ void VTAUopPush(uint32_t mode,
* \param src_factor The input factor.
* \param wgt_factor The weight factor.
*/
void VTAUopLoopBegin(uint32_t extent,
TVM_DLL void VTAUopLoopBegin(uint32_t extent,
uint32_t dst_factor,
uint32_t src_factor,
uint32_t wgt_factor);
@ -211,7 +212,7 @@ void VTAUopLoopBegin(uint32_t extent,
/*!
* \brief Mark end of a micro op loop.
*/
void VTAUopLoopEnd();
TVM_DLL void VTAUopLoopEnd();
/*!
* \brief Push GEMM uop kernel into the command handle.
@ -221,7 +222,7 @@ void VTAUopLoopEnd();
* \param nbytes Number of bytes to in the closure arguments.
* \return 0 if success.
*/
int VTAPushGEMMOp(void** uop_handle,
TVM_DLL int VTAPushGEMMOp(void** uop_handle,
int (*finit)(void*),
void* signature,
int nbytes);
@ -234,7 +235,7 @@ int VTAPushGEMMOp(void** uop_handle,
* \param nbytes Number of bytes to in the closure arguments.
* \return 0 if success.
*/
int VTAPushALUOp(void** uop_handle,
TVM_DLL int VTAPushALUOp(void** uop_handle,
int (*finit)(void*),
void* signature,
int nbytes);
@ -246,7 +247,7 @@ int VTAPushALUOp(void** uop_handle,
* \param to_qid The destination queue.
* \return 0 if success.
*/
int VTADepPush(VTACommandHandle cmd, int from_qid, int to_qid);
TVM_DLL int VTADepPush(VTACommandHandle cmd, int from_qid, int to_qid);
/*!
* \brief Pop dependence signal.
@ -255,7 +256,7 @@ int VTADepPush(VTACommandHandle cmd, int from_qid, int to_qid);
* \param to_qid The destination queue.
* \return 0 if success.
*/
int VTADepPop(VTACommandHandle cmd, int from_qid, int to_qid);
TVM_DLL int VTADepPop(VTACommandHandle cmd, int from_qid, int to_qid);
/*!
* \brief Synchronize the command handle.
@ -266,7 +267,7 @@ int VTADepPop(VTACommandHandle cmd, int from_qid, int to_qid);
* \param wait_cycles The limit of poll cycles.
*
*/
void VTASynchronize(VTACommandHandle cmd, uint32_t wait_cycles);
TVM_DLL void VTASynchronize(VTACommandHandle cmd, uint32_t wait_cycles);
#ifdef __cplusplus
}

Просмотреть файл

@ -10,6 +10,7 @@
#include <vta/hw_spec.h>
#include <vta/runtime.h>
#include <dmlc/logging.h>
#include <tvm/runtime/c_runtime_api.h>
#include <cassert>
#include <cstring>