* [WIP] C++ topi contributions

Summary:
This diff implements C++ topi contributions for:
  - relu with parametrix threshold
  - pad with generic padBefore / padAfter specification
  - matmult with transposes
  - conv2d_nchw, conv2d_hwcn with runtime constant padding and strides
  - depthwise_conv2d_nchw with runtime constant padding and strides
  - group_conv2d_ngchw with runtime constant padding and strides
  - broadcast_to a broadcastable shape
  - broadcast_bop where bop is an usual binary op (+ - * / %)

Convolution padding is implemented using the pad operation.
To avoid extra memory consumption, it is generally recommended to inline the padding with the autoinliner.
Unfortunately in its current form the elemwise checks are too restrictive to allow inlining.
So this diff also proposes an extension to LHS injective (i.e. no reduction axis in the current IR design)

Test Plan:
Tested in C++ testsuite in a separate repository, I am looking for suggestions to quickly spin up some tests for tvm.

Reviewers: tqchen

Subscribers:

Tasks:

Tags:

Blame Revision:

* Review + Lint + GSG C++
This commit is contained in:
Nicolas Vasilache 2017-08-14 00:50:25 +02:00 коммит произвёл Tianqi Chen
Родитель a3776ba5ae
Коммит f08de2b690
6 изменённых файлов: 421 добавлений и 6 удалений

Просмотреть файл

@ -40,6 +40,16 @@ Stmt ScheduleOps(Schedule s, Map<IterVar, Range> dom_map);
*/
void AutoInlineElemWise(Schedule sch);
/*!
* \brief To automatically inline operations with injective writes
* (i.e. writes without reduction or sequential loops). Note
* that in this case, guarantees about contiguity, transpose, stride,
* alignemnt and memory footprint in general do not hold.
*
* \param sch The schedule to be inlined.
*/
void AutoInlineInjective(Schedule sch);
} // namespace schedule
} // namespace tvm
#endif // TVM_SCHEDULE_PASS_H_

Просмотреть файл

@ -60,5 +60,38 @@ void AutoInlineElemWise(Schedule sch) {
}
}
bool IsBroadcast(const Operation& op) {
if (const ComputeOpNode* compute = op.as<ComputeOpNode>()) {
if (compute->reduce_axis.size()) {
return false;
}
// TODO(nicolasvasilache): Implement Me
}
return false;
}
void AutoInlineBroadcast(Schedule sch) {
for (Stage s : sch->stages) {
if (!s.is_scheduled() && IsBroadcast(s->op) && !s->is_output) {
s.compute_inline();
}
}
}
bool IsInjective(const Operation& op) {
if (const ComputeOpNode* compute = op.as<ComputeOpNode>()) {
return compute->reduce_axis.size() == 0;
}
return false;
}
void AutoInlineInjective(Schedule sch) {
for (Stage s : sch->stages) {
if (!s.is_scheduled() && IsInjective(s->op) && !s->is_output) {
s.compute_inline();
}
}
}
} // namespace schedule
} // namespace tvm

Просмотреть файл

@ -0,0 +1,57 @@
/*
* Copyright (c) 2017 by Contributors
* \brief Broadcast op constructions
* \file broadcast.h
*/
#ifndef TOPI_BROADCAST_H_
#define TOPI_BROADCAST_H_
#include <topi/detail/broadcast.h>
namespace topi {
inline tvm::Tensor broadcast_to(const tvm::Tensor& I,
const tvm::Array<tvm::Expr>& output_shape) {
CHECK_GE(output_shape.size(), I->shape.size())
<< "Not a broadcast, output dimensionality smaller than input.\noutput: "
<< output_shape << "\nvs\ninput: " << I;
auto bh = detail::BroadcastShape(output_shape, I->shape);
CHECK_EQ(output_shape.size(), bh.common_shape.size());
for (int i = 0; i < output_shape.size(); ++i) {
CHECK(tvm::ir::Equal(output_shape[i], bh.common_shape[i]));
}
auto l = [&](tvm::Array<tvm::Var> ovars) {
return I(detail::InputIndexFromBroadcast(ovars, I, bh.vars2, bh.all_vars));
};
return tvm::compute(
tvm::Array<tvm::Expr>(bh.common_shape.begin(), bh.common_shape.end()), l);
}
inline tvm::Tensor broadcast_add(const tvm::Tensor& A, const tvm::Tensor& B) {
auto l = [&](tvm::Expr a, tvm::Expr b) { return a + b; };
return detail::WithBroadcast(l, A, B);
}
inline tvm::Tensor broadcast_sub(const tvm::Tensor& A, const tvm::Tensor& B) {
auto l = [&](tvm::Expr a, tvm::Expr b) { return a - b; };
return detail::WithBroadcast(l, A, B);
}
inline tvm::Tensor broadcast_mul(const tvm::Tensor& A, const tvm::Tensor& B) {
auto l = [&](tvm::Expr a, tvm::Expr b) { return a * b; };
return detail::WithBroadcast(l, A, B);
}
inline tvm::Tensor broadcast_div(const tvm::Tensor& A, const tvm::Tensor& B) {
auto l = [&](tvm::Expr a, tvm::Expr b) { return a / b; };
return detail::WithBroadcast(l, A, B);
}
inline tvm::Tensor broadcast_mod(const tvm::Tensor& A, const tvm::Tensor& B) {
auto l = [&](tvm::Expr a, tvm::Expr b) { return a % b; };
return detail::WithBroadcast(l, A, B);
}
} // namespace topi
#endif // TOPI_BROADCAST_H_

Просмотреть файл

@ -0,0 +1,107 @@
/*
* Copyright (c) 2017 by Contributors
* \brief Detail broadcast.
* \file broadcast.h
*/
#ifndef TOPI_DETAIL_BROADCAST_H_
#define TOPI_DETAIL_BROADCAST_H_
#include <algorithm>
#include <deque>
#include "tvm/ir_pass.h"
#include "tvm/tvm.h"
namespace topi {
namespace detail {
struct BroadcastHelper {
std::deque<tvm::Expr> common_shape;
std::deque<tvm::Var> all_vars;
std::deque<tvm::Var> vars1;
std::deque<tvm::Var> vars2;
};
inline BroadcastHelper BroadcastShape(const tvm::Array<tvm::Expr>& shape1,
const tvm::Array<tvm::Expr>& shape2) {
BroadcastHelper bh;
int s1_size = shape1.size();
int s2_size = shape2.size();
tvm::Expr one(1);
int i;
for (i = 1; i <= std::min(s1_size, s2_size); ++i) {
bh.all_vars.push_front(tvm::Var());
if (tvm::ir::Equal(shape1[s1_size - i], shape2[s2_size - i])) {
bh.common_shape.push_front(shape1[s1_size - i]);
bh.vars1.push_front(bh.all_vars[0]);
bh.vars2.push_front(bh.all_vars[0]);
} else if (tvm::ir::Equal(one, shape1[s1_size - i])) {
CHECK(!tvm::ir::Equal(one, shape2[s2_size - i]));
bh.common_shape.push_front(shape2[s2_size - i]);
bh.vars2.push_front(bh.all_vars[0]);
} else if (tvm::ir::Equal(one, shape2[s2_size - i])) {
bh.common_shape.push_front(shape1[s1_size - i]);
bh.vars1.push_front(bh.all_vars[0]);
} else {
CHECK(false) << "Incompatible broadcast dims: " << shape1[s1_size - i]
<< " and " << shape2[s2_size - i] << " in: "
<< tvm::Array<tvm::Expr>(shape1.begin(), shape1.end())
<< " and "
<< tvm::Array<tvm::Expr>(shape2.begin(), shape2.end());
}
}
// Remaining dimensions whether on shape1 or shape2 can always be completed
auto max_size = std::max(s1_size, s2_size);
auto& shape = (s1_size > s2_size) ? shape1 : shape2;
auto& vars = (s1_size > s2_size) ? bh.vars1 : bh.vars2;
for (i = i; i <= max_size; ++i) {
bh.all_vars.push_front(tvm::Var());
bh.common_shape.push_front(shape[max_size - i]);
vars.push_front(bh.all_vars[0]);
}
return bh;
}
inline tvm::Array<tvm::Expr> InputIndexFromBroadcast(
const tvm::Array<tvm::Var>& ovars, const tvm::Tensor& T,
const std::deque<tvm::Var>& my_vars, const std::deque<tvm::Var>& all_vars) {
tvm::Array<tvm::Expr> ivars;
CHECK_EQ(ovars.size(), all_vars.size());
// N^2, could use a map but NBD..
int expected_dims = T->shape.size();
for (int i = 0; i < ovars.size(); ++i) {
bool found = false;
for (int j = 0; j < my_vars.size(); ++j) {
if (all_vars[i].same_as(my_vars[j])) {
ivars.push_back(ovars[i]);
found = true;
break;
}
}
// Only inject 0 here if we have not yet reached the dimension of I
// (i.e. this must be a 1)
if (!found && (ovars.size() - i) <= expected_dims) {
ivars.push_back(tvm::make_zero(ovars[i].type()));
}
}
CHECK(expected_dims == ivars.size());
return ivars;
}
template <typename FBinaryExpr>
inline tvm::Tensor WithBroadcast(FBinaryExpr op, const tvm::Tensor& A,
const tvm::Tensor& B) {
auto bh = BroadcastShape(A->shape, B->shape);
auto l = [&](tvm::Array<tvm::Var> ovars) {
return op(A(InputIndexFromBroadcast(ovars, A, bh.vars1, bh.all_vars)),
B(InputIndexFromBroadcast(ovars, B, bh.vars2, bh.all_vars)));
};
return tvm::compute(
tvm::Array<tvm::Expr>(bh.common_shape.begin(), bh.common_shape.end()), l);
}
} // namespace detail
} // namespace topi
#endif // TOPI_DETAIL_BROADCAST_H_

Просмотреть файл

@ -1,6 +1,6 @@
/*!
* Copyright (c) 2017 by Contributors
* \file topi.h
* \file ewise.h
* \brief Elementwise op constructions
*/
#ifndef TOPI_EWISE_H_
@ -12,16 +12,17 @@ namespace topi {
using namespace tvm;
// Unary intrinsic operators
#define TOPI_DECLARE_UNARY_OP(OpName) \
inline Tensor OpName(const Tensor& x) { \
return compute(x->shape, [&](const Array<Var>& i) { \
return ::tvm::OpName(x(i)); \
}); \
#define TOPI_DECLARE_UNARY_OP(OpName) \
inline Tensor OpName(const Tensor& x) { \
return compute(x->shape, [&](const Array<Var>& i) { \
return ::tvm::OpName(x(i)); \
}, "tensor", "ewise"); \
}
TOPI_DECLARE_UNARY_OP(exp);
TOPI_DECLARE_UNARY_OP(tanh);
TOPI_DECLARE_UNARY_OP(sigmoid);
TOPI_DECLARE_UNARY_OP(sqrt);
} // namespace topi
#endif // TOPI_EWISE_H_

207
topi/include/topi/nn.h Normal file
Просмотреть файл

@ -0,0 +1,207 @@
/*
* Copyright (c) 2017 by Contributors
* \brief NN op constructions
* \file nn.h
*/
#ifndef TOPI_NN_H_
#define TOPI_NN_H_
#include <algorithm>
#include "tvm/ir.h"
#include "tvm/ir_pass.h"
#include "tvm/tvm.h"
namespace topi {
namespace detail {
template <typename T>
tvm::Expr Map(const tvm::Array<tvm::Expr>& exprs, T op) {
CHECK_GE(exprs.size(), 1);
tvm::Expr res = exprs[0];
for (int i = 1; i < exprs.size(); ++i) {
res = op(res, exprs[i]);
}
return res;
}
} // namespace detail
template <typename T>
inline tvm::Tensor relu(const tvm::Tensor& x, T threshold = static_cast<T>(0)) {
return tvm::compute(
x->shape,
[&](const tvm::Array<tvm::Var>& i) { return tvm::max(x(i), threshold); },
"tensor", "ewise");
}
inline tvm::Tensor pad(
const tvm::Tensor& t, const tvm::Array<tvm::Expr>& pad_before,
tvm::Array<tvm::Expr> pad_after = tvm::Array<tvm::Expr>()) {
if (pad_after.size() < pad_before.size()) {
for (int i = pad_after.size(); i < pad_before.size(); ++i) {
pad_after.push_back(pad_before[i]);
}
}
CHECK_GE(pad_before.size(), 1);
CHECK_EQ(pad_before.size(), pad_after.size());
tvm::Array<tvm::Expr> output_shape;
for (int i = 0; i < t->shape.size(); ++i) {
if (i >= pad_before.size()) {
output_shape.push_back(t->shape[i]);
} else {
output_shape.push_back(
tvm::ir::Simplify(t->shape[i] + pad_before[i] + pad_after[i]));
}
}
auto l = [&](tvm::Array<tvm::Var> ovars) {
tvm::Array<tvm::Expr> indices;
tvm::Array<tvm::Expr> sel;
for (int i = 0; i < t->shape.size(); ++i) {
if (i >= pad_before.size()) {
indices.push_back(ovars[i]);
continue;
}
if (!tvm::ir::Equal(pad_before[i], 0)) {
sel.push_back(ovars[i] >= pad_before[i]);
indices.push_back(ovars[i] - pad_before[i]);
} else {
indices.push_back(ovars[i]);
}
if (!tvm::ir::Equal(pad_after[i], 0)) {
sel.push_back(tvm::ir::Simplify(ovars[i] < pad_before[i] + t->shape[i]));
}
}
return tvm::select(detail::Map(sel, tvm::ir::And::make), t(indices), 0);
};
return tvm::compute(output_shape, l, "tensor", "ewise");
}
// Returns a compute that calculates a row-major matrix multiplication:
// A(i, k) * B(k, j), if trans_a == trans_b
// the usual transposed combinations, otherwise
inline tvm::Tensor matmult(const tvm::Tensor& A, const tvm::Tensor& B,
bool trans_a = false, bool trans_b = false) {
tvm::Array<tvm::Expr> output_shape{A->shape[trans_a ? 1 : 0],
B->shape[trans_b ? 0 : 1]};
auto k = tvm::reduce_axis(tvm::Range{0, A->shape[trans_a ? 0 : 1]}, "k");
auto l = [&](tvm::Var i, tvm::Var j) {
return tvm::sum((trans_a ? A[k][i] : A[i][k]) * (trans_b ? B[j][k] : B[k][j]),
{k});
};
return tvm::compute(output_shape, l);
}
inline tvm::Tensor conv2d_nchw(const tvm::Tensor& I, const tvm::Tensor& W,
int pad_h = 0, int pad_w = 0, int stride_h = 1,
int stride_w = 1) {
CHECK_EQ(4, I->shape.size());
CHECK_EQ(4, W->shape.size());
auto pH = I->shape[2];
auto pW = I->shape[3];
tvm::Array<tvm::Expr> output_shape{
I->shape[0], // B
W->shape[1], // O
(I->shape[2] - W->shape[2] + 2 * pad_h) / stride_h + 1, // H
(I->shape[3] - W->shape[3] + 2 * pad_w) / stride_w + 1 // W
};
auto i = tvm::reduce_axis(tvm::Range{0, I->shape[1]}, "i");
auto kh = tvm::reduce_axis(tvm::Range{0, W->shape[2]}, "kh");
auto kw = tvm::reduce_axis(tvm::Range{0, W->shape[3]}, "kw");
auto T = (pad_h == 0 && pad_w == 0)
? I
: pad(I, {tvm::Expr(0), tvm::Expr(0), pad_h, pad_w});
auto l = [&](tvm::Var b, tvm::Var o, tvm::Var h, tvm::Var w) {
return tvm::sum(
T(b, i, stride_h * h + kh, stride_w * w + kw) * W(i, o, kh, kw),
{i, kh, kw});
};
return tvm::compute(output_shape, l);
}
inline tvm::Tensor conv2d_hwcn(const tvm::Tensor& I, const tvm::Tensor& W,
int pad_h = 0, int pad_w = 0, int stride_h = 1,
int stride_w = 1) {
CHECK_EQ(4, I->shape.size());
CHECK_EQ(4, W->shape.size());
auto pH = I->shape[2];
auto pW = I->shape[3];
tvm::Array<tvm::Expr> output_shape{
(I->shape[2] - W->shape[2] + 2 * pad_h) / stride_h + 1, // H
(I->shape[3] - W->shape[3] + 2 * pad_w) / stride_w + 1, // W
I->shape[2], // B
W->shape[3] // O
};
auto i = tvm::reduce_axis(tvm::Range{0, I->shape[3]}, "i");
auto kh = tvm::reduce_axis(tvm::Range{0, W->shape[0]}, "kh");
auto kw = tvm::reduce_axis(tvm::Range{0, W->shape[1]}, "kw");
auto T = (pad_h == 0 && pad_w == 0) ? I : pad(I, {pad_h, pad_w});
auto l = [&](tvm::Var b, tvm::Var o, tvm::Var h, tvm::Var w) {
return tvm::sum(
T(stride_h * h + kh, stride_w * w + kw, i, b) * W(kh, kw, i, o),
{i, kh, kw});
};
return tvm::compute(output_shape, l);
}
inline tvm::Tensor depthwise_conv2d_nchw(const tvm::Tensor& I,
const tvm::Tensor& W, int pad_h = 0,
int pad_w = 0, int stride_h = 1,
int stride_w = 1) {
CHECK_EQ(4, I->shape.size());
CHECK_EQ(4, W->shape.size());
auto pH = I->shape[2];
auto pW = I->shape[3];
auto pCM = W->shape[1]; // channel_multiplier
tvm::Array<tvm::Expr> output_shape{
I->shape[0], // B
W->shape[1], // O
(I->shape[2] - W->shape[2] + 2 * pad_h) / stride_h + 1, // H
(I->shape[3] - W->shape[3] + 2 * pad_w) / stride_w + 1 // W
};
auto i = tvm::reduce_axis(tvm::Range{0, I->shape[1]}, "i");
auto kh = tvm::reduce_axis(tvm::Range{0, W->shape[2]}, "kh");
auto kw = tvm::reduce_axis(tvm::Range{0, W->shape[3]}, "kw");
auto T = (pad_h == 0 && pad_w == 0)
? I
: pad(I, {tvm::Expr(0), tvm::Expr(0), pad_h, pad_w});
auto l = [&](tvm::Var b, tvm::Var o, tvm::Var h, tvm::Var w) {
return tvm::sum(T(b, i / pCM, stride_h * h + kh, stride_w * w + kw) *
W(i / pCM, o % pCM, kh, kw),
{i, kh, kw});
};
return tvm::compute(output_shape, l);
}
inline tvm::Tensor group_conv2d_ngchw(const tvm::Tensor& I,
const tvm::Tensor& W, int pad_h = 0,
int pad_w = 0, int stride_h = 1,
int stride_w = 1) {
CHECK_EQ(5, I->shape.size());
CHECK_EQ(5, W->shape.size());
auto pH = I->shape[2];
auto pW = I->shape[3];
tvm::Array<tvm::Expr> output_shape{
I->shape[0], // B
I->shape[1], // G
W->shape[2], // O
(I->shape[3] - W->shape[3] + 2 * pad_h) / stride_h + 1, // H
(I->shape[4] - W->shape[4] + 2 * pad_w) / stride_w + 1 // W
};
auto i = tvm::reduce_axis(tvm::Range{0, I->shape[2]}, "i");
auto kh = tvm::reduce_axis(tvm::Range{0, W->shape[3]}, "kh");
auto kw = tvm::reduce_axis(tvm::Range{0, W->shape[4]}, "kw");
auto T = (pad_h == 0 && pad_w == 0)
? I
: pad(I, {tvm::Expr(0), tvm::Expr(0), tvm::Expr(0), pad_h, pad_w});
auto l = [&](tvm::Var b, tvm::Var g, tvm::Var o, tvm::Var h, tvm::Var w) {
return tvm::sum(
I(b, g, i, stride_h * h + kh, stride_w * w + kw) * W(g, i, o, kh, kw),
{i, kh, kw});
};
return tvm::compute(output_shape, l);
}
} // namespace topi
#endif // TOPI_NN_H_