CNTK v2 library: Migrate past_value and future_value to sequence

This commit is contained in:
Amit Agarwal 2017-03-30 02:22:55 -07:00
Родитель 0d2879eab5
Коммит 8a9020f78e
26 изменённых файлов: 242 добавлений и 93 удалений

Просмотреть файл

@ -6,7 +6,7 @@ from cntk import Trainer, Axis, device, combine
from cntk.layers.blocks import Stabilizer, _initializer_for, _INFERRED, Parameter, Placeholder
from cntk.layers import Recurrence, Convolution, Dense
from cntk.ops import input, sequence, reduce_sum, \
parameter, times, element_times, past_value, plus, placeholder, reshape, constant, sigmoid, convolution, tanh, times_transpose, greater, element_divide, element_select, exp, future_value, past_value
parameter, times, element_times, plus, placeholder, reshape, constant, sigmoid, convolution, tanh, times_transpose, greater, element_divide, element_select, exp
from cntk.losses import cosine_distance
from cntk.internal import _as_tuple, sanitize_input
from cntk.initializer import uniform, glorot_uniform

Просмотреть файл

@ -10,7 +10,7 @@ import os
from cntk import Trainer, Axis
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
from cntk.learners import momentum_sgd, fsadagrad, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType
from cntk import input, cross_entropy_with_softmax, classification_error, sequence, past_value, future_value, \
from cntk import input, cross_entropy_with_softmax, classification_error, sequence, \
element_select, alias, hardmax, placeholder, combine, parameter, times, plus
from cntk.ops.functions import CloneMethod, load_model, Function
from cntk.initializer import glorot_uniform

Просмотреть файл

@ -17,7 +17,7 @@ from cntk.device import try_set_default_device, gpu
from cntk.train.distributed import *
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
from cntk.learners import learning_rate_schedule, UnitType, momentum_sgd, momentum_as_time_constant_schedule
from cntk import input, cross_entropy_with_softmax, classification_error, sequence, past_value, future_value, element_select, alias, hardmax
from cntk import input, cross_entropy_with_softmax, classification_error, sequence, element_select, alias, hardmax
from cntk.ops.functions import CloneMethod
from cntk.train.training_session import *
from cntk.logging import *
@ -87,7 +87,7 @@ def create_network(input_vocab_dim, label_vocab_dim):
encoder_outputH = stabilize(input_sequence)
for i in range(0, num_layers):
(encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value)
encoder_outputH.output, hidden_dim, hidden_dim, sequence.future_value, sequence.future_value)
thought_vectorH = sequence.first(encoder_outputH)
thought_vectorC = sequence.first(encoder_outputC)
@ -100,20 +100,20 @@ def create_network(input_vocab_dim, label_vocab_dim):
# Decoder
decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence
decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(
decoder_input = element_select(is_first_label, label_sentence_start_scattered, sequence.past_value(
decoder_history_hook))
decoder_outputH = stabilize(decoder_input)
for i in range(0, num_layers):
if (i > 0):
recurrence_hookH = past_value
recurrence_hookC = past_value
recurrence_hookH = sequence.past_value
recurrence_hookC = sequence.past_value
else:
isFirst = sequence.is_first(label_sequence)
recurrence_hookH = lambda operand: element_select(
isFirst, thought_vector_broadcastH, past_value(operand))
isFirst, thought_vector_broadcastH, sequence.past_value(operand))
recurrence_hookC = lambda operand: element_select(
isFirst, thought_vector_broadcastC, past_value(operand))
isFirst, thought_vector_broadcastC, sequence.past_value(operand))
(decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC)

Просмотреть файл

@ -168,7 +168,7 @@ def LSTMP_cell_with_self_stabilization(input, prev_output, prev_cell_state):
return (times(element_times(expsWmr, mt), Wmr), ct)
def LSTMP_component_with_self_stabilization(input, output_dim, cell_dim, recurrence_hookH=past_value, recurrence_hookC=past_value):
def LSTMP_component_with_self_stabilization(input, output_dim, cell_dim, recurrence_hookH=sequence.past_value, recurrence_hookC=sequence.past_value):
dh = placeholder(
shape=(output_dim), dynamic_axes=input.dynamic_axes)
dc = placeholder(

Просмотреть файл

@ -44,7 +44,7 @@ def create_test_model():
def with_lookahead():
x = placeholder()
future_x = future_value(x)
future_x = sequence.future_value(x)
apply_x = splice (x, future_x)
return apply_x

Просмотреть файл

@ -89,10 +89,9 @@ if __name__=='__main__':
array([[7.2,8.2]]),
array([[7.3,8.3], [7.31, 8.31]]),
]
from cntk.ops import past_value, future_value
data_seq_axis = Axis('inputAxis')
init_seq_axis = Axis('initAxis')
f = past_value(sequence.input(2, sequence_axis=data_seq_axis), time_step=2, initial_state=sequence.input(2, sequence_axis=init_seq_axis))
f = sequence.past_value(sequence.input(2, sequence_axis=data_seq_axis), time_step=2, initial_state=sequence.input(2, sequence_axis=init_seq_axis))
res = f(data, initial_state)
print(res)

Просмотреть файл

@ -9,7 +9,7 @@ module_path = os.path.join(py_path, 'ReasoNet')
import cntk.device as device
import numpy as np
from cntk.ops.tests.ops_test_utils import cntk_device
from cntk.ops import input, past_value, future_value
from cntk.ops import input
from cntk.io import MinibatchSource
from cntk import Trainer, Axis, device, combine
from cntk.layers import Recurrence, Convolution

Просмотреть файл

@ -833,7 +833,7 @@
"\n",
"* takes no input arguments\n",
"* creates a placeholder (sequence) variable\n",
"* computes the \"next value\" in this sequence using the `future_value()` operation and\n",
"* computes the \"next value\" in this sequence using the `sequence.future_value()` operation and\n",
"* concatenates the current and the next value into a vector of twice the embedding dimension using `splice()`\n",
"\n",
"and then insert this function into `Sequential()`'s list right after the embedding layer."
@ -1055,7 +1055,7 @@
"source": [
"def OneWordLookahead():\n",
" x = C.placeholder()\n",
" apply_x = splice (x, future_value(x))\n",
" apply_x = splice (x, sequence.future_value(x))\n",
" return apply_x\n",
"\n",
"def create_model():\n",

Просмотреть файл

@ -238,6 +238,7 @@
"#from keras.optimizers import *\n",
"from cntk import *\n",
"from cntk.layers import *\n",
"from cntk.ops.sequence import input\n",
"# Select the right target device when this notebook is being tested:\n",
"if 'TEST_DEVICE' in os.environ:\n",
" if os.environ['TEST_DEVICE'] == 'cpu':\n",
@ -1014,7 +1015,7 @@
"D = 4 # input dimensionality\n",
"H = 10 # number of hidden layer neurons\n",
"\n",
"observations = C.input(STATE_COUNT, np.float32, name=\"obs\")\n",
"observations = input(STATE_COUNT, np.float32, name=\"obs\")\n",
"\n",
"W1 = C.parameter(shape=(STATE_COUNT, H), init=C.glorot_uniform(), name=\"W1\")\n",
"b1 = C.parameter(shape=H, name=\"b1\")\n",
@ -1106,8 +1107,8 @@
}
],
"source": [
"input_y = C.input(1, np.float32, name=\"input_y\")\n",
"advantages = C.input(1, np.float32, name=\"advt\")\n",
"input_y = input(1, np.float32, name=\"input_y\")\n",
"advantages = input(1, np.float32, name=\"advt\")\n",
"\n",
"loss = -C.reduce_mean(C.log(C.square(input_y - probability) + 1e-4) * advantages, axis=0, name='loss')\n",
"\n",

Просмотреть файл

@ -184,7 +184,7 @@
"from cntk import Trainer, Axis\n",
"from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT\n",
"from cntk.learners import momentum_sgd, fsadagrad, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType\n",
"from cntk import input, cross_entropy_with_softmax, classification_error, sequence, past_value, future_value, \\\n",
"from cntk import input, cross_entropy_with_softmax, classification_error, sequence, \\\n",
" element_select, alias, hardmax, placeholder_variable, combine, parameter, times, plus\n",
"from cntk.ops.functions import CloneMethod, load_model, Function\n",
"from cntk.initializer import glorot_uniform\n",

Просмотреть файл

@ -281,7 +281,7 @@
" indices = np.random.choice(\n",
" range(Param.num_classes),\n",
" size=num_vectors, \n",
" p = data_sampling_distribution()).reshape((1, num_vectors))\n",
" p = data_sampling_distribution()).reshape((num_vectors, 1))\n",
" list_of_vectors = C.Value.one_hot(indices, Param.num_classes)\n",
" return (list_of_vectors, indices.flatten())\n",
"\n",
@ -376,7 +376,7 @@
" vectors, indices = get_random_one_hot_data(Param.test_set_size)\n",
" total_cross_entropy = 0.0\n",
" arguments = (vectors)\n",
" z = softmax_input.eval(arguments)[0].reshape(Param.test_set_size, Param.num_classes)\n",
" z = softmax_input.eval(arguments).reshape(Param.test_set_size, Param.num_classes)\n",
"\n",
" for i in range(len(indices)):\n",
" log_p = log_softmax(z[i], indices[i])\n",

Просмотреть файл

@ -168,7 +168,7 @@
"from cntk import Trainer, Axis\n",
"from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT\n",
"from cntk.learners import momentum_sgd, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType\n",
"from cntk import input, cross_entropy_with_softmax, classification_error, sequence, past_value, future_value, element_select, \\\n",
"from cntk import input, cross_entropy_with_softmax, classification_error, sequence, element_select, \\\n",
" alias, hardmax, placeholder, combine, parameter, plus, times\n",
"from cntk.ops.functions import CloneMethod\n",
"from cntk.layers import LSTM, Stabilizer\n",
@ -499,7 +499,7 @@
},
"outputs": [],
"source": [
"def LSTM_layer(input, output_dim, recurrence_hook_h=past_value, recurrence_hook_c=past_value):\n",
"def LSTM_layer(input, output_dim, recurrence_hook_h=sequence.past_value, recurrence_hook_c=sequence.past_value):\n",
" # we first create placeholders for the hidden state and cell state which we don't have yet\n",
" dh = placeholder(shape=(output_dim), dynamic_axes=input.dynamic_axes)\n",
" dc = placeholder(shape=(output_dim), dynamic_axes=input.dynamic_axes)\n",
@ -531,7 +531,7 @@
"\n",
"We will use the LSTM recurrence that we defined just above. Remember that its function signature is:\n",
"\n",
"`def LSTM_layer(input, output_dim, recurrence_hook_h=past_value, recurrence_hook_c=past_value):`\n",
"`def LSTM_layer(input, output_dim, recurrence_hook_h=sequence.past_value, recurrence_hook_c=sequence.past_value):`\n",
"\n",
"and it returns a tuple `(hidden_state, hidden_cell)`. We will complete the following four exercises below. If possible, try them out before looking at the answers.\n",
"\n",
@ -571,7 +571,7 @@
"\n",
"# 4.\n",
"# Reverse the order of the input_sequence (this has been shown to help especially in machine translation)\n",
"(encoder_output_h, encoder_output_c) = LSTM_layer(input_sequence, hidden_dim, future_value, future_value)"
"(encoder_output_h, encoder_output_c) = LSTM_layer(input_sequence, hidden_dim, sequence.future_value, sequence.future_value)"
]
},
{
@ -595,7 +595,7 @@
},
"outputs": [],
"source": [
"decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(label_sequence))"
"decoder_input = element_select(is_first_label, label_sentence_start_scattered, sequence.past_value(label_sequence))"
]
},
{
@ -618,7 +618,7 @@
"outputs": [],
"source": [
"(output_h, output_c) = LSTM_layer(input_sequence, hidden_dim,\n",
" recurrence_hook_h=past_value, recurrence_hook_c=past_value)"
" recurrence_hook_h=sequence.past_value, recurrence_hook_c=sequence.past_value)"
]
},
{
@ -647,8 +647,8 @@
"# 1.\n",
"# Create the recurrence hooks for the decoder LSTM.\n",
"\n",
"recurrence_hook_h = lambda operand: element_select(is_first_label, thought_vector_broadcast_h, past_value(operand))\n",
"recurrence_hook_c = lambda operand: element_select(is_first_label, thought_vector_broadcast_c, past_value(operand))\n",
"recurrence_hook_h = lambda operand: element_select(is_first_label, thought_vector_broadcast_h, sequence.past_value(operand))\n",
"recurrence_hook_c = lambda operand: element_select(is_first_label, thought_vector_broadcast_c, sequence.past_value(operand))\n",
"\n",
"# 2.\n",
"# With your recurrence hooks, create the decoder.\n",
@ -663,13 +663,13 @@
"decoder_output_h = alias(decoder_input)\n",
"for i in range(0, num_layers):\n",
" if (i > 0):\n",
" recurrence_hook_h = past_value\n",
" recurrence_hook_c = past_value\n",
" recurrence_hook_h = sequence.past_value\n",
" recurrence_hook_c = sequence.past_value\n",
" else:\n",
" recurrence_hook_h = lambda operand: element_select(\n",
" is_first_label, thought_vector_broadcast_h, past_value(operand))\n",
" is_first_label, thought_vector_broadcast_h, sequence.past_value(operand))\n",
" recurrence_hook_c = lambda operand: element_select(\n",
" is_first_label, thought_vector_broadcast_c, past_value(operand))\n",
" is_first_label, thought_vector_broadcast_c, sequence.past_value(operand))\n",
"\n",
" (decoder_output_h, decoder_output_c) = LSTM_layer(decoder_output_h.output, hidden_dim,\n",
" recurrence_hook_h, recurrence_hook_c)"
@ -749,7 +749,7 @@
" encoder_output_h = stabilize(input_sequence)\n",
" for i in range(0, num_layers):\n",
" (encoder_output_h, encoder_output_c) = LSTM_layer(\n",
" encoder_output_h.output, hidden_dim, future_value, future_value)\n",
" encoder_output_h.output, hidden_dim, sequence.future_value, sequence.future_value)\n",
"\n",
" # Prepare encoder output to be used in decoder\n",
" thought_vector_h = sequence.first(encoder_output_h)\n",
@ -763,19 +763,19 @@
" # Decoder\n",
" decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence\n",
"\n",
" decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(\n",
" decoder_input = element_select(is_first_label, label_sentence_start_scattered, sequence.past_value(\n",
" decoder_history_hook))\n",
"\n",
" decoder_output_h = stabilize(decoder_input)\n",
" for i in range(0, num_layers):\n",
" if (i > 0):\n",
" recurrence_hook_h = past_value\n",
" recurrence_hook_c = past_value\n",
" recurrence_hook_h = sequence.past_value\n",
" recurrence_hook_c = sequence.past_value\n",
" else:\n",
" recurrence_hook_h = lambda operand: element_select(\n",
" is_first_label, thought_vector_broadcast_h, past_value(operand))\n",
" is_first_label, thought_vector_broadcast_h, sequence.past_value(operand))\n",
" recurrence_hook_c = lambda operand: element_select(\n",
" is_first_label, thought_vector_broadcast_c, past_value(operand))\n",
" is_first_label, thought_vector_broadcast_c, sequence.past_value(operand))\n",
"\n",
" (decoder_output_h, decoder_output_c) = LSTM_layer(\n",
" decoder_output_h.output, hidden_dim, recurrence_hook_h, recurrence_hook_c)\n",
@ -936,7 +936,7 @@
"outputs": [],
"source": [
"decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence\n",
"decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_hook))"
"decoder_input = element_select(is_first_label, label_sentence_start_scattered, sequence.past_value(decoder_history_hook))"
]
},
{

Просмотреть файл

@ -493,8 +493,8 @@ def memoize(func):
@memoize
def _sparse_to_dense_network_cache(input_shape):
from cntk.ops import times, input
from cntk.ops import times, sequence
temp_input = input(input_shape)
temp_input = sequence.input(input_shape)
eye_shape = input_shape[-1]
return times(temp_input, np.eye(eye_shape))

Просмотреть файл

@ -14,7 +14,7 @@ import numpy as np
from cntk import input, placeholder, combine, alias, sequence, parameter, constant
from cntk.variables import Record, Constant, Parameter
from cntk.axis import Axis
from cntk.ops import times, slice, sigmoid, tanh, log, exp, softplus, past_value, future_value
from cntk.ops import times, slice, sigmoid, tanh, log, exp, softplus
from .typing import Signature
from cntk.internal import _as_tuple
from cntk.initializer import glorot_uniform
@ -96,7 +96,7 @@ def ForwardDeclaration(name='forward_declaration'):
>>> x = C.input(**Sequence[Tensor[2]])
>>> ones_like_input = sequence.broadcast_as(1, x) # sequence of scalar ones of same length as input
>>> out_fwd = ForwardDeclaration() # placeholder for the state variables
>>> out = past_value(out_fwd, initial_state=0) + ones_like_input
>>> out = sequence.past_value(out_fwd, initial_state=0) + ones_like_input
>>> out_fwd.resolve_to(out)
>>> length = sequence.last(out)
>>> x0 = np.reshape(np.arange(6,dtype=np.float32),(1,3,2))

Просмотреть файл

@ -208,9 +208,9 @@ def _window(x, axis, begin, end, step, stride, initial_state=None):
helper to expand a sequence into a window, splicing them along the given axis (which must already exist)
'''
shifted = [
past_value(x, initial_state=initial_state, time_step=-t) if t < 0 else
sequence.past_value(x, initial_state=initial_state, time_step=-t) if t < 0 else
x if t == 0 else
future_value(x, initial_state=initial_state, time_step=t)
sequence.future_value(x, initial_state=initial_state, time_step=t)
for t in range(begin, end, step)
]
r = splice(*shifted, axis=axis)

Просмотреть файл

@ -7,7 +7,7 @@
# sequence -- first/higher-order functions over sequences, like Recurrence()
from ..variables import Record
from ..ops import combine, past_value, future_value, splice, sequence
from ..ops import combine, splice, sequence
from .blocks import *
from .blocks import _get_initial_state_or_default, _inject_name
@ -193,7 +193,7 @@ def RecurrenceFrom(step_function, go_backwards=default_override_or(False), retur
>>> decoder = RecurrenceFrom(LSTM(500)) # decoder starts from a data-dependent initial state, hence -From()
>>> emit = Dense(30000)
>>> h, c = encoder(embed(en)).outputs # LSTM encoder has two outputs (h, c)
>>> z = emit(decoder(h, c, past_value(fr))) # decoder takes encoder outputs as initial state
>>> z = emit(decoder(h, c, sequence.past_value(fr))) # decoder takes encoder outputs as initial state
>>> loss = C.cross_entropy_with_softmax(z, fr)
Args:
@ -570,7 +570,7 @@ def UnfoldFrom(generator_function, until_predicate=None, length_increase=1, name
# apply until_predicate if given
if until_predicate is not None:
valid_frames = Recurrence(lambda h, x: (1-past_value(x)) * h, initial_state=1, name='valid_frames')(until_predicate(output))
valid_frames = Recurrence(lambda h, x: (1-sequence.past_value(x)) * h, initial_state=1, name='valid_frames')(until_predicate(output))
output = sequence.gather(output, valid_frames, name='valid_output')
return output

Просмотреть файл

@ -435,7 +435,7 @@ class TensorBoardProgressWriter(cntk_py.ProgressWriter):
if freq is None:
freq = sys.maxsize
super(TensorBoardProgressWriter, self).__init__(freq, 0, sys.maxsize, 0)
super(TensorBoardProgressWriter, self).__init__(freq, 0, sys.maxsize, 0, sys.maxsize, 0)
# Only log either when rank is not specified or when rank is 0.
self.writer = cntk_py.TensorBoardFileWriter(log_dir, model) if not rank else None

Просмотреть файл

@ -1633,6 +1633,8 @@ def element_select(flag, value_if_true, value_if_false, name=''):
@typemap
def future_value(x, initial_state=None, time_step=1, name=''):
'''
DEPRECATED.
This function returns the future value w.r.t. ``x``. It is most often used when
creating RNNs. The resulting tensor has the same shape as the input but is
the next logical sample. The ``time_step`` parameter is the number of steps
@ -1674,20 +1676,18 @@ def future_value(x, initial_state=None, time_step=1, name=''):
:class:`~cntk.ops.functions.Function`
'''
from cntk.internal import sanitize_dtype_cntk
from ..cntk_py import Constant
from cntk.cntk_py import future_value
import warnings
warnings.warn('This will be removed in future versions. Please use '
'sequence.future_value() instead.', DeprecationWarning)
if initial_state is None:
initial_state = Constant.scalar(sanitize_dtype_cntk(np.float32), 0.0)
x = sanitize_input(x)
return future_value(x, initial_state, time_step, name)
return sequence.future_value(x, initial_state, time_step, name)
@typemap
def past_value(x, initial_state=None, time_step=1, name=''):
'''
DEPRECATED.
This function returns the past value w.r.t. ``x``. It is most often used when
creating RNNs. The resulting tensor has the same shape as the input but is
the previous logical sample. The ``time_step`` parameter is the number of steps
@ -1774,17 +1774,11 @@ def past_value(x, initial_state=None, time_step=1, name=''):
:class:`~cntk.ops.functions.Function`
'''
from cntk.internal import sanitize_dtype_cntk
from ..cntk_py import Constant
from cntk.cntk_py import past_value
import warnings
warnings.warn('This will be removed in future versions. Please use '
'sequence.past_value() instead.', DeprecationWarning)
if initial_state is None:
initial_state = Constant.scalar(sanitize_dtype_cntk(np.float32), 0.0)
else:
initial_state = sanitize_input(initial_state)
x = sanitize_input(x)
return past_value(x, initial_state, time_step, name)
return sequence.past_value(x, initial_state, time_step, name)
# TODO: does this belong into .sequence?

Просмотреть файл

@ -38,9 +38,166 @@ def input(shape, dtype=default_override_or(np.float32), needs_gradient=False, is
# sequence ops
##########################################################################
@typemap
def future_value(x, initial_state=None, time_step=1, name=''):
'''
This function returns the future value w.r.t. ``x``. It is most often used when
creating RNNs. The resulting tensor has the same shape as the input but is
the next logical sample. The ``time_step`` parameter is the number of steps
to look into the future and is 1 by default. If there is no future value (i.e.
the current sample is the last one in the tensor) then the ``initial_state``
value is returned.
The initial state can be a constant (scalar or tensor), a learnable tensor
or input data (which has a batch dimension, as needed for sequence-to-sequence models).
Example:
>>> x = C.sequence.input(shape=(3,2))
>>> # Create one sequence with 4 tensors of shape (3, 2)
>>> x0 = np.reshape(np.arange(24,dtype=np.float32),(1,4,3,2))
>>> y = C.sequence.future_value(x) # using initial state of 0 by default
>>> y.eval({x:x0})
[array([[[ 6., 7.],
[ 8., 9.],
[ 10., 11.]],
<BLANKLINE>
[[ 12., 13.],
[ 14., 15.],
[ 16., 17.]],
<BLANKLINE>
[[ 18., 19.],
[ 20., 21.],
[ 22., 23.]],
<BLANKLINE>
[[ 0., 0.],
[ 0., 0.],
[ 0., 0.]]], dtype=float32)]
Args:
x: the tensor (or its name) from which the future value is obtained.
initial_state: tensor or scalar representing the initial value to be used when the input tensor is shifted in time.
time_step (int): the number of time steps to look into the future (default 1)
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.internal import sanitize_dtype_cntk
from ...cntk_py import Constant
from cntk.cntk_py import future_value
if initial_state is None:
initial_state = Constant.scalar(sanitize_dtype_cntk(np.float32), 0.0)
x = sanitize_input(x)
return future_value(x, initial_state, time_step, name)
@typemap
def past_value(x, initial_state=None, time_step=1, name=''):
'''
This function returns the past value w.r.t. ``x``. It is most often used when
creating RNNs. The resulting tensor has the same shape as the input but is
the previous logical sample. The ``time_step`` parameter is the number of steps
to look into the past and is 1 by default. If there is no past value (i.e.
the current sample is the first one in the tensor) then the ``initial_state``
value is returned.
The initial state can be a constant (scalar or tensor), a learnable tensor
or input data (which has a batch dimension, as needed for sequence-to-sequence models).
Example:
>>> # create example input: one sequence with 4 tensors of shape (3, 2)
>>> from cntk.layers.typing import Tensor, Sequence
>>> x = C.sequence.input((3,2))
>>> x0 = np.reshape(np.arange(24,dtype=np.float32),(1,4,3,2))
>>> x0
array([[[[ 0., 1.],
[ 2., 3.],
[ 4., 5.]],
<BLANKLINE>
[[ 6., 7.],
[ 8., 9.],
[ 10., 11.]],
<BLANKLINE>
[[ 12., 13.],
[ 14., 15.],
[ 16., 17.]],
<BLANKLINE>
[[ 18., 19.],
[ 20., 21.],
[ 22., 23.]]]], dtype=float32)
>>> # this demonstrates how past_value shifts the sequence by one, padding with initial_state
>>> y = C.sequence.past_value(x) # initial_state is 0 by default
>>> y.eval({x:x0})
[array([[[ 0., 0.],
[ 0., 0.],
[ 0., 0.]],
<BLANKLINE>
[[ 0., 1.],
[ 2., 3.],
[ 4., 5.]],
<BLANKLINE>
[[ 6., 7.],
[ 8., 9.],
[ 10., 11.]],
<BLANKLINE>
[[ 12., 13.],
[ 14., 15.],
[ 16., 17.]]], dtype=float32)]
>>> # here, we pass a the initial_state as input data (e.g. sequence-to-sequence)
>>> s = C.input((3,2)) # not a sequence, e.g. a final encoder hidden state
>>> s0 = np.reshape(np.arange(6,dtype=np.float32)/2,(1,3,2))
>>> s0
array([[[ 0. , 0.5],
[ 1. , 1.5],
[ 2. , 2.5]]], dtype=float32)
>>> y = C.sequence.past_value(x, initial_state=s)
>>> y.eval({x:x0, s:s0}) # same as the previous example except for the first time step
[array([[[ 0. , 0.5],
[ 1. , 1.5],
[ 2. , 2.5]],
<BLANKLINE>
[[ 0. , 1. ],
[ 2. , 3. ],
[ 4. , 5. ]],
<BLANKLINE>
[[ 6. , 7. ],
[ 8. , 9. ],
[ 10. , 11. ]],
<BLANKLINE>
[[ 12. , 13. ],
[ 14. , 15. ],
[ 16. , 17. ]]], dtype=float32)]
Args:
x: the tensor (or its name) from which the past value is obtained
initial_state: tensor or scalar representing the initial value to be used when the input tensor is shifted in time.
time_step (int): the number of time steps to look into the past (default 1)
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.internal import sanitize_dtype_cntk
from ...cntk_py import Constant
from cntk.cntk_py import past_value
if initial_state is None:
initial_state = Constant.scalar(sanitize_dtype_cntk(np.float32), 0.0)
else:
initial_state = sanitize_input(initial_state)
x = sanitize_input(x)
return past_value(x, initial_state, time_step, name)
def delay(x, initial_state=None, time_step=1, name=''):
'''
This function combines :func:`~cntk.ops.past_value` and :func:`~cntk.ops.future_value` into a single function.
This function combines :func:`~cntk.ops.sequence.past_value` and :func:`~cntk.ops.sequence.future_value` into a single function.
This is useful when the time_step is computed and can be positive, negative, or 0.
Args:
@ -49,7 +206,7 @@ def delay(x, initial_state=None, time_step=1, name=''):
time_step (int): the number of time steps to look into the past, where negative values mean to look into the future, and 0 means a no-op (default 1).
name (str, optional): the name of the Function instance in the network
'''
from ...ops import alias, past_value, future_value, element_select, element_divide, placeholder, exp
from ...ops import alias, element_select, element_divide, placeholder, exp
if time_step > 0:
return past_value (x, time_step= time_step, initial_state=initial_state, name=name)
elif time_step < 0:
@ -424,7 +581,7 @@ def reduce_max(x, name=''):
Returns:
The max value in the input sequence
"""
from ...ops import past_value, future_value, element_select, placeholder, greater
from ...ops import element_select, placeholder, greater
m = placeholder(shape=(1,), dynamic_axes = x.dynamic_axes, name='max')
o = element_select(greater(x, future_value(m)), x, future_value(m))
rlt = o.replace_placeholders({m:sanitize_input(o)})

Просмотреть файл

@ -13,7 +13,7 @@ import pytest
from ..functions import *
from ...train.trainer import *
from ...initializer import glorot_uniform
from .. import constant, parameter, input, placeholder, times, plus, past_value, sequence, as_composite, combine, convolution, splice, as_block
from .. import constant, parameter, input, placeholder, times, plus, sequence, as_composite, combine, convolution, splice, as_block
from ... import InferredDimension, gpu, cpu
from .ops_test_utils import compare_lists_of_np_arrays, AA, cntk_device
@ -190,7 +190,7 @@ def test_data_type_inference():
def test_recurrence_shape_inference():
i = sequence.input((2,))
p = placeholder()
p_past = past_value(p)
p_past = sequence.past_value(p)
p_past_plus_i = p_past + i
p_past_plus_i.replace_placeholder(p_past_plus_i.output)

Просмотреть файл

@ -58,8 +58,7 @@ def test_op_future_value(input_size, time_step, initial_state, device_id, precis
}
init = parameter(init=AA(initial_state, dtype=dt), device=cntk_device(device_id))
from .. import future_value
input_op_input = future_value(a, init, time_step)
input_op_input = sequence.future_value(a, init, time_step)
unittest_helper(input_op_input,
x, expected_forward, expected_backward,
@ -95,8 +94,7 @@ def test_op_past_value(input_size, time_step, initial_state, device_id, precisio
init = parameter(init=AA(initial_state, dtype=dt), device=cntk_device(device_id))
from .. import past_value
input_op_input = past_value(a, init, time_step)
input_op_input = sequence.past_value(a, init, time_step)
unittest_helper(input_op_input,
x, expected_forward, expected_backward,

Просмотреть файл

@ -513,7 +513,7 @@ def test_op_broadcast_as(device_id, precision):
def test_op_broadcast_as_in_loop(device_id):
from .. import sequence, placeholder, past_value, input
from .. import sequence, placeholder, input
a_data = [AA([1]), AA([2]), AA([3])]
b_data = [AA([[2]]), AA([[2], [3]]), AA([[2], [3], [4]])]
@ -522,7 +522,7 @@ def test_op_broadcast_as_in_loop(device_id):
b = sequence.input(shape=(1,), name='b')
out_placeholder = placeholder()
out_delayed = past_value(out_placeholder, time_step=5)
out_delayed = sequence.past_value(out_placeholder, time_step=5)
out_delayed_plus_b = out_delayed + b
out = sequence.broadcast_as(a, out_delayed_plus_b)
out.replace_placeholder(out)

Просмотреть файл

@ -135,7 +135,7 @@ def test_training_3d_sparse_sequence_with_recurrence(device_id):
a_projection = times(a, w_i)
p_o = C.placeholder()
h = C.past_value(p_o)
h = C.sequence.past_value(p_o)
w_h = C.parameter(init=w_init_h, device=dev)
h_projection = times(h, w_h)
z = a_projection + h_projection

Просмотреть файл

@ -10,7 +10,7 @@ from cntk import *
def test_outputs():
fwd_state = placeholder("placeholder")
prev_state = past_value(fwd_state, name="prev_state")
prev_state = sequence.past_value(fwd_state, name="prev_state")
z = abs(prev_state, "abs")
output = z.output
z = z.replace_placeholders({fwd_state: z.output})

Просмотреть файл

@ -71,7 +71,7 @@ introduced, overloaded operators can be applied to them to form an operator grap
import cntk as C
# Create an input with the shape (2,3,*)
>>> x = C.input((2,3), name='features')
>>> x = C.sequence.input((2,3), name='features')
# Create a constant scalar with value 2
>>> c = C.constant(value=2)
@ -80,29 +80,29 @@ introduced, overloaded operators can be applied to them to form an operator grap
>>> w = C.parameter((2,3))
# Set up some test input data to check the operators.
# We specify a full batch having one element, which is a
# We specify a full batch having a sequence with one element, which is a
# (2,3) matrix.
>>> test_input = [ np.asarray([[10,20,30],[40,50,60]]) ]
>>> test_input = [[ np.asarray([[10,20,30],[40,50,60]]) ]]
# Elementwise multiplication operation
>>> op = x * c
# Evaluate the op using test_input
>>> print(op.eval({ x: test_input }))
array([[[ 20., 40., 60.],
[ 80., 100., 120.]]], dtype=float32)
[array([[[ 20., 40., 60.],
[ 80., 100., 120.]]], dtype=float32)]
# Same as above (2 will be converted to constant)
>>> op2 = x * 2
>>> print(op2.eval({ x: test_input }))
array([[[ 20., 40., 60.],
[ 80., 100., 120.]]], dtype=float32)
[array([[[ 20., 40., 60.],
[ 80., 100., 120.]]], dtype=float32)]
# Elementwise multiplication of two 2x3 matrices
>>> op3 = x * [[1,2,3], [4,5,6]]
>>> print(op3.eval({ x: test_input}))
array([[[ 10., 40., 90.],
[ 160., 250., 360.]]], dtype=float32)
[array([[[ 10., 40., 90.],
[ 160., 250., 360.]]], dtype=float32)]
Broadcasting

Просмотреть файл

@ -866,8 +866,8 @@ end with a zero:
Notes
~~~~~
This layer is a wrapper around the ``past_value()`` and
``future_value()`` primitives.
This layer is a wrapper around the ``sequence.past_value()`` and
``sequence.future_value()`` primitives.
Example
~~~~~~~