Improve numerical gradient check (#3856)

This commit is contained in:
SWu 2019-08-31 02:25:16 -04:00 коммит произвёл Wuwei Lin
Родитель 2ebf1bd14e
Коммит a711f38ebe
1 изменённых файлов: 61 добавлений и 62 удалений

Просмотреть файл

@ -56,72 +56,71 @@ def run_infer_type(expr):
return run_opt_pass(expr, transform.InferType())
def rand_from_type(t):
return relay.Constant(rand(t.dtype, *[int(d) for d in t.shape]))
def _np_randn_from_type(t, scale=1):
return (scale * np.random.randn(*(int(d) for d in t.shape))).astype(t.dtype)
CHECK_GRAD_COUNTER = 0
def check_grad(func, mod=None):
def check_grad(func, inputs=None, eps=1e-6, atol=1e-5, rtol=1e-3):
"""Perform numerical gradient checking given a relay function.
Compare analytical gradients to numerical gradients derived from two-sided approximation. Note
that this test may fail if your function input types are not of high enough precision.
Parameters
----------
func : tvm.relay.Function
The relay function to test.
inputs: List[np.array]
Optional user-provided input parameters to use. If not given, will generate random normal
inputs scaled to be close to the chosen epsilon value to avoid numerical precision loss.
eps: float
The epsilon value to use for computing numerical gradient approximation.
atol: float
The absolute tolerance on difference between numerical and analytical gradients. Note that
this needs to be scaled appropriately relative to the chosen eps and inputs.
rtol: float
The relative tolerance on difference between numerical and analytical gradients. Note that
this needs to be scaled appropriately relative to the chosen eps.
"""
Test that directional gradient calculated by reverse mode
is close to the one calculated by finite difference.
"""
global CHECK_GRAD_COUNTER
if mod is None:
mod = relay.Module()
def make(name):
return GlobalVar(name + str(CHECK_GRAD_COUNTER))
func_name = make("func_")
back_func_name = make("back_func_")
finite_difference_func_name = make("finite_difference_")
reverse_mode_func_name = make("reverse_mode_")
check_func_name = make("check_func_")
CHECK_GRAD_COUNTER = CHECK_GRAD_COUNTER + 1
epsilon = relay.const(0.01)
mod[func_name] = func
mod[back_func_name] = gradient(mod[func_name], mod=mod)
params = mod[func_name].params
directions = [rand_from_type(x.checked_type) for x in params]
ft = TensorType(())
sb = ScopeBuilder()
def get_reverse_mode_result(e, d, t):
assert isinstance(t, TensorType)
return op.cast(e * d, 'float32')
bf = sb.let("bf", TupleGetItem(back_func_name(*params), 1))
reverse_mode_results = [get_reverse_mode_result(TupleGetItem(bf, i),
directions[i],
x.checked_type)
for i, x in enumerate(params)]
reverse_mode_result = relay.const(0.0)
for x in reverse_mode_results:
reverse_mode_result = reverse_mode_result + op.reduce.sum(x)
sb.ret(reverse_mode_result)
reverse_mode_result = sb.get()
mod[reverse_mode_func_name] = Function(params,
reverse_mode_result,
ft,
mod[func_name].type_params,
mod[func_name].attrs)
finite_difference_result = op.reduce.sum((func_name(*[x + epsilon * y for x, y in
zip(params, directions)]) -
func_name(*params)) /
epsilon)
mod[finite_difference_func_name] = Function(params,
finite_difference_result,
ft,
mod[func_name].type_params,
mod[func_name].attrs)
check_func_result = op.abs(reverse_mode_func_name(*params) -
finite_difference_func_name(*params))
mod[check_func_name] = Function(params,
check_func_result,
ft,
mod[func_name].type_params,
mod[func_name].attrs)
ex = create_executor(mod=mod)
res = ex.evaluate(check_func_name(*[rand_from_type(x.checked_type) for x in params]))
assert res.data.asnumpy() < 0.001
fwd_func = run_infer_type(func)
bwd_func = run_infer_type(gradient(fwd_func))
if inputs is None:
params = fwd_func.params
# Generate random inputs on the same scale as epsilon to avoid numerical precision loss.
inputs = [_np_randn_from_type(x.checked_type, scale=(10 * eps)) for x in params]
for target, ctx in ctx_list():
intrp = relay.create_executor(ctx=ctx, target=target)
# Get analytic gradients.
_, grads = intrp.evaluate(bwd_func)(*inputs)
grads = [grad.asnumpy().astype("float64") for grad in grads]
# Get numeric gradients for each dimension of each param, using two-sided approximation.
approx_grads = []
for x in inputs:
approx_grad = np.zeros(x.shape)
for i in np.ndindex(*x.shape):
x_i = x[i]
x[i] = x_i + eps
fwd_plus = intrp.evaluate(fwd_func)(*inputs).asnumpy().astype("float64")
x[i] = x_i - eps
fwd_minus = intrp.evaluate(fwd_func)(*inputs).asnumpy().astype("float64")
x[i] = x_i
approx_grad[i] = np.sum((fwd_plus - fwd_minus) / (2 * eps))
approx_grads.append(approx_grad)
# Compare gradients by checking that relative difference is below tolerance.
for grad, approx_grad in zip(grads, approx_grads):
np.testing.assert_allclose(grad, approx_grad, atol=atol, rtol=rtol)
def rand(dtype, *shape):
return tvm.nd.array(np.random.rand(*shape).astype(dtype))