CNTK v2 library: Migrate past_value and future_value to sequence
This commit is contained in:
Родитель
0d2879eab5
Коммит
8a9020f78e
|
@ -6,7 +6,7 @@ from cntk import Trainer, Axis, device, combine
|
|||
from cntk.layers.blocks import Stabilizer, _initializer_for, _INFERRED, Parameter, Placeholder
|
||||
from cntk.layers import Recurrence, Convolution, Dense
|
||||
from cntk.ops import input, sequence, reduce_sum, \
|
||||
parameter, times, element_times, past_value, plus, placeholder, reshape, constant, sigmoid, convolution, tanh, times_transpose, greater, element_divide, element_select, exp, future_value, past_value
|
||||
parameter, times, element_times, plus, placeholder, reshape, constant, sigmoid, convolution, tanh, times_transpose, greater, element_divide, element_select, exp
|
||||
from cntk.losses import cosine_distance
|
||||
from cntk.internal import _as_tuple, sanitize_input
|
||||
from cntk.initializer import uniform, glorot_uniform
|
||||
|
|
|
@ -10,7 +10,7 @@ import os
|
|||
from cntk import Trainer, Axis
|
||||
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT
|
||||
from cntk.learners import momentum_sgd, fsadagrad, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType
|
||||
from cntk import input, cross_entropy_with_softmax, classification_error, sequence, past_value, future_value, \
|
||||
from cntk import input, cross_entropy_with_softmax, classification_error, sequence, \
|
||||
element_select, alias, hardmax, placeholder, combine, parameter, times, plus
|
||||
from cntk.ops.functions import CloneMethod, load_model, Function
|
||||
from cntk.initializer import glorot_uniform
|
||||
|
|
|
@ -17,7 +17,7 @@ from cntk.device import try_set_default_device, gpu
|
|||
from cntk.train.distributed import *
|
||||
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
|
||||
from cntk.learners import learning_rate_schedule, UnitType, momentum_sgd, momentum_as_time_constant_schedule
|
||||
from cntk import input, cross_entropy_with_softmax, classification_error, sequence, past_value, future_value, element_select, alias, hardmax
|
||||
from cntk import input, cross_entropy_with_softmax, classification_error, sequence, element_select, alias, hardmax
|
||||
from cntk.ops.functions import CloneMethod
|
||||
from cntk.train.training_session import *
|
||||
from cntk.logging import *
|
||||
|
@ -87,7 +87,7 @@ def create_network(input_vocab_dim, label_vocab_dim):
|
|||
encoder_outputH = stabilize(input_sequence)
|
||||
for i in range(0, num_layers):
|
||||
(encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
|
||||
encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value)
|
||||
encoder_outputH.output, hidden_dim, hidden_dim, sequence.future_value, sequence.future_value)
|
||||
|
||||
thought_vectorH = sequence.first(encoder_outputH)
|
||||
thought_vectorC = sequence.first(encoder_outputC)
|
||||
|
@ -100,20 +100,20 @@ def create_network(input_vocab_dim, label_vocab_dim):
|
|||
# Decoder
|
||||
decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence
|
||||
|
||||
decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(
|
||||
decoder_input = element_select(is_first_label, label_sentence_start_scattered, sequence.past_value(
|
||||
decoder_history_hook))
|
||||
|
||||
decoder_outputH = stabilize(decoder_input)
|
||||
for i in range(0, num_layers):
|
||||
if (i > 0):
|
||||
recurrence_hookH = past_value
|
||||
recurrence_hookC = past_value
|
||||
recurrence_hookH = sequence.past_value
|
||||
recurrence_hookC = sequence.past_value
|
||||
else:
|
||||
isFirst = sequence.is_first(label_sequence)
|
||||
recurrence_hookH = lambda operand: element_select(
|
||||
isFirst, thought_vector_broadcastH, past_value(operand))
|
||||
isFirst, thought_vector_broadcastH, sequence.past_value(operand))
|
||||
recurrence_hookC = lambda operand: element_select(
|
||||
isFirst, thought_vector_broadcastC, past_value(operand))
|
||||
isFirst, thought_vector_broadcastC, sequence.past_value(operand))
|
||||
|
||||
(decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
|
||||
decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC)
|
||||
|
|
|
@ -168,7 +168,7 @@ def LSTMP_cell_with_self_stabilization(input, prev_output, prev_cell_state):
|
|||
return (times(element_times(expsWmr, mt), Wmr), ct)
|
||||
|
||||
|
||||
def LSTMP_component_with_self_stabilization(input, output_dim, cell_dim, recurrence_hookH=past_value, recurrence_hookC=past_value):
|
||||
def LSTMP_component_with_self_stabilization(input, output_dim, cell_dim, recurrence_hookH=sequence.past_value, recurrence_hookC=sequence.past_value):
|
||||
dh = placeholder(
|
||||
shape=(output_dim), dynamic_axes=input.dynamic_axes)
|
||||
dc = placeholder(
|
||||
|
|
|
@ -44,7 +44,7 @@ def create_test_model():
|
|||
|
||||
def with_lookahead():
|
||||
x = placeholder()
|
||||
future_x = future_value(x)
|
||||
future_x = sequence.future_value(x)
|
||||
apply_x = splice (x, future_x)
|
||||
return apply_x
|
||||
|
||||
|
|
|
@ -89,10 +89,9 @@ if __name__=='__main__':
|
|||
array([[7.2,8.2]]),
|
||||
array([[7.3,8.3], [7.31, 8.31]]),
|
||||
]
|
||||
from cntk.ops import past_value, future_value
|
||||
data_seq_axis = Axis('inputAxis')
|
||||
init_seq_axis = Axis('initAxis')
|
||||
f = past_value(sequence.input(2, sequence_axis=data_seq_axis), time_step=2, initial_state=sequence.input(2, sequence_axis=init_seq_axis))
|
||||
f = sequence.past_value(sequence.input(2, sequence_axis=data_seq_axis), time_step=2, initial_state=sequence.input(2, sequence_axis=init_seq_axis))
|
||||
res = f(data, initial_state)
|
||||
print(res)
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ module_path = os.path.join(py_path, 'ReasoNet')
|
|||
import cntk.device as device
|
||||
import numpy as np
|
||||
from cntk.ops.tests.ops_test_utils import cntk_device
|
||||
from cntk.ops import input, past_value, future_value
|
||||
from cntk.ops import input
|
||||
from cntk.io import MinibatchSource
|
||||
from cntk import Trainer, Axis, device, combine
|
||||
from cntk.layers import Recurrence, Convolution
|
||||
|
|
|
@ -833,7 +833,7 @@
|
|||
"\n",
|
||||
"* takes no input arguments\n",
|
||||
"* creates a placeholder (sequence) variable\n",
|
||||
"* computes the \"next value\" in this sequence using the `future_value()` operation and\n",
|
||||
"* computes the \"next value\" in this sequence using the `sequence.future_value()` operation and\n",
|
||||
"* concatenates the current and the next value into a vector of twice the embedding dimension using `splice()`\n",
|
||||
"\n",
|
||||
"and then insert this function into `Sequential()`'s list right after the embedding layer."
|
||||
|
@ -1055,7 +1055,7 @@
|
|||
"source": [
|
||||
"def OneWordLookahead():\n",
|
||||
" x = C.placeholder()\n",
|
||||
" apply_x = splice (x, future_value(x))\n",
|
||||
" apply_x = splice (x, sequence.future_value(x))\n",
|
||||
" return apply_x\n",
|
||||
"\n",
|
||||
"def create_model():\n",
|
||||
|
|
|
@ -238,6 +238,7 @@
|
|||
"#from keras.optimizers import *\n",
|
||||
"from cntk import *\n",
|
||||
"from cntk.layers import *\n",
|
||||
"from cntk.ops.sequence import input\n",
|
||||
"# Select the right target device when this notebook is being tested:\n",
|
||||
"if 'TEST_DEVICE' in os.environ:\n",
|
||||
" if os.environ['TEST_DEVICE'] == 'cpu':\n",
|
||||
|
@ -1014,7 +1015,7 @@
|
|||
"D = 4 # input dimensionality\n",
|
||||
"H = 10 # number of hidden layer neurons\n",
|
||||
"\n",
|
||||
"observations = C.input(STATE_COUNT, np.float32, name=\"obs\")\n",
|
||||
"observations = input(STATE_COUNT, np.float32, name=\"obs\")\n",
|
||||
"\n",
|
||||
"W1 = C.parameter(shape=(STATE_COUNT, H), init=C.glorot_uniform(), name=\"W1\")\n",
|
||||
"b1 = C.parameter(shape=H, name=\"b1\")\n",
|
||||
|
@ -1106,8 +1107,8 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"input_y = C.input(1, np.float32, name=\"input_y\")\n",
|
||||
"advantages = C.input(1, np.float32, name=\"advt\")\n",
|
||||
"input_y = input(1, np.float32, name=\"input_y\")\n",
|
||||
"advantages = input(1, np.float32, name=\"advt\")\n",
|
||||
"\n",
|
||||
"loss = -C.reduce_mean(C.log(C.square(input_y - probability) + 1e-4) * advantages, axis=0, name='loss')\n",
|
||||
"\n",
|
||||
|
|
|
@ -184,7 +184,7 @@
|
|||
"from cntk import Trainer, Axis\n",
|
||||
"from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT\n",
|
||||
"from cntk.learners import momentum_sgd, fsadagrad, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType\n",
|
||||
"from cntk import input, cross_entropy_with_softmax, classification_error, sequence, past_value, future_value, \\\n",
|
||||
"from cntk import input, cross_entropy_with_softmax, classification_error, sequence, \\\n",
|
||||
" element_select, alias, hardmax, placeholder_variable, combine, parameter, times, plus\n",
|
||||
"from cntk.ops.functions import CloneMethod, load_model, Function\n",
|
||||
"from cntk.initializer import glorot_uniform\n",
|
||||
|
|
|
@ -281,7 +281,7 @@
|
|||
" indices = np.random.choice(\n",
|
||||
" range(Param.num_classes),\n",
|
||||
" size=num_vectors, \n",
|
||||
" p = data_sampling_distribution()).reshape((1, num_vectors))\n",
|
||||
" p = data_sampling_distribution()).reshape((num_vectors, 1))\n",
|
||||
" list_of_vectors = C.Value.one_hot(indices, Param.num_classes)\n",
|
||||
" return (list_of_vectors, indices.flatten())\n",
|
||||
"\n",
|
||||
|
@ -376,7 +376,7 @@
|
|||
" vectors, indices = get_random_one_hot_data(Param.test_set_size)\n",
|
||||
" total_cross_entropy = 0.0\n",
|
||||
" arguments = (vectors)\n",
|
||||
" z = softmax_input.eval(arguments)[0].reshape(Param.test_set_size, Param.num_classes)\n",
|
||||
" z = softmax_input.eval(arguments).reshape(Param.test_set_size, Param.num_classes)\n",
|
||||
"\n",
|
||||
" for i in range(len(indices)):\n",
|
||||
" log_p = log_softmax(z[i], indices[i])\n",
|
||||
|
|
|
@ -168,7 +168,7 @@
|
|||
"from cntk import Trainer, Axis\n",
|
||||
"from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT\n",
|
||||
"from cntk.learners import momentum_sgd, momentum_as_time_constant_schedule, learning_rate_schedule, UnitType\n",
|
||||
"from cntk import input, cross_entropy_with_softmax, classification_error, sequence, past_value, future_value, element_select, \\\n",
|
||||
"from cntk import input, cross_entropy_with_softmax, classification_error, sequence, element_select, \\\n",
|
||||
" alias, hardmax, placeholder, combine, parameter, plus, times\n",
|
||||
"from cntk.ops.functions import CloneMethod\n",
|
||||
"from cntk.layers import LSTM, Stabilizer\n",
|
||||
|
@ -499,7 +499,7 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def LSTM_layer(input, output_dim, recurrence_hook_h=past_value, recurrence_hook_c=past_value):\n",
|
||||
"def LSTM_layer(input, output_dim, recurrence_hook_h=sequence.past_value, recurrence_hook_c=sequence.past_value):\n",
|
||||
" # we first create placeholders for the hidden state and cell state which we don't have yet\n",
|
||||
" dh = placeholder(shape=(output_dim), dynamic_axes=input.dynamic_axes)\n",
|
||||
" dc = placeholder(shape=(output_dim), dynamic_axes=input.dynamic_axes)\n",
|
||||
|
@ -531,7 +531,7 @@
|
|||
"\n",
|
||||
"We will use the LSTM recurrence that we defined just above. Remember that its function signature is:\n",
|
||||
"\n",
|
||||
"`def LSTM_layer(input, output_dim, recurrence_hook_h=past_value, recurrence_hook_c=past_value):`\n",
|
||||
"`def LSTM_layer(input, output_dim, recurrence_hook_h=sequence.past_value, recurrence_hook_c=sequence.past_value):`\n",
|
||||
"\n",
|
||||
"and it returns a tuple `(hidden_state, hidden_cell)`. We will complete the following four exercises below. If possible, try them out before looking at the answers.\n",
|
||||
"\n",
|
||||
|
@ -571,7 +571,7 @@
|
|||
"\n",
|
||||
"# 4.\n",
|
||||
"# Reverse the order of the input_sequence (this has been shown to help especially in machine translation)\n",
|
||||
"(encoder_output_h, encoder_output_c) = LSTM_layer(input_sequence, hidden_dim, future_value, future_value)"
|
||||
"(encoder_output_h, encoder_output_c) = LSTM_layer(input_sequence, hidden_dim, sequence.future_value, sequence.future_value)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -595,7 +595,7 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(label_sequence))"
|
||||
"decoder_input = element_select(is_first_label, label_sentence_start_scattered, sequence.past_value(label_sequence))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -618,7 +618,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"(output_h, output_c) = LSTM_layer(input_sequence, hidden_dim,\n",
|
||||
" recurrence_hook_h=past_value, recurrence_hook_c=past_value)"
|
||||
" recurrence_hook_h=sequence.past_value, recurrence_hook_c=sequence.past_value)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -647,8 +647,8 @@
|
|||
"# 1.\n",
|
||||
"# Create the recurrence hooks for the decoder LSTM.\n",
|
||||
"\n",
|
||||
"recurrence_hook_h = lambda operand: element_select(is_first_label, thought_vector_broadcast_h, past_value(operand))\n",
|
||||
"recurrence_hook_c = lambda operand: element_select(is_first_label, thought_vector_broadcast_c, past_value(operand))\n",
|
||||
"recurrence_hook_h = lambda operand: element_select(is_first_label, thought_vector_broadcast_h, sequence.past_value(operand))\n",
|
||||
"recurrence_hook_c = lambda operand: element_select(is_first_label, thought_vector_broadcast_c, sequence.past_value(operand))\n",
|
||||
"\n",
|
||||
"# 2.\n",
|
||||
"# With your recurrence hooks, create the decoder.\n",
|
||||
|
@ -663,13 +663,13 @@
|
|||
"decoder_output_h = alias(decoder_input)\n",
|
||||
"for i in range(0, num_layers):\n",
|
||||
" if (i > 0):\n",
|
||||
" recurrence_hook_h = past_value\n",
|
||||
" recurrence_hook_c = past_value\n",
|
||||
" recurrence_hook_h = sequence.past_value\n",
|
||||
" recurrence_hook_c = sequence.past_value\n",
|
||||
" else:\n",
|
||||
" recurrence_hook_h = lambda operand: element_select(\n",
|
||||
" is_first_label, thought_vector_broadcast_h, past_value(operand))\n",
|
||||
" is_first_label, thought_vector_broadcast_h, sequence.past_value(operand))\n",
|
||||
" recurrence_hook_c = lambda operand: element_select(\n",
|
||||
" is_first_label, thought_vector_broadcast_c, past_value(operand))\n",
|
||||
" is_first_label, thought_vector_broadcast_c, sequence.past_value(operand))\n",
|
||||
"\n",
|
||||
" (decoder_output_h, decoder_output_c) = LSTM_layer(decoder_output_h.output, hidden_dim,\n",
|
||||
" recurrence_hook_h, recurrence_hook_c)"
|
||||
|
@ -749,7 +749,7 @@
|
|||
" encoder_output_h = stabilize(input_sequence)\n",
|
||||
" for i in range(0, num_layers):\n",
|
||||
" (encoder_output_h, encoder_output_c) = LSTM_layer(\n",
|
||||
" encoder_output_h.output, hidden_dim, future_value, future_value)\n",
|
||||
" encoder_output_h.output, hidden_dim, sequence.future_value, sequence.future_value)\n",
|
||||
"\n",
|
||||
" # Prepare encoder output to be used in decoder\n",
|
||||
" thought_vector_h = sequence.first(encoder_output_h)\n",
|
||||
|
@ -763,19 +763,19 @@
|
|||
" # Decoder\n",
|
||||
" decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence\n",
|
||||
"\n",
|
||||
" decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(\n",
|
||||
" decoder_input = element_select(is_first_label, label_sentence_start_scattered, sequence.past_value(\n",
|
||||
" decoder_history_hook))\n",
|
||||
"\n",
|
||||
" decoder_output_h = stabilize(decoder_input)\n",
|
||||
" for i in range(0, num_layers):\n",
|
||||
" if (i > 0):\n",
|
||||
" recurrence_hook_h = past_value\n",
|
||||
" recurrence_hook_c = past_value\n",
|
||||
" recurrence_hook_h = sequence.past_value\n",
|
||||
" recurrence_hook_c = sequence.past_value\n",
|
||||
" else:\n",
|
||||
" recurrence_hook_h = lambda operand: element_select(\n",
|
||||
" is_first_label, thought_vector_broadcast_h, past_value(operand))\n",
|
||||
" is_first_label, thought_vector_broadcast_h, sequence.past_value(operand))\n",
|
||||
" recurrence_hook_c = lambda operand: element_select(\n",
|
||||
" is_first_label, thought_vector_broadcast_c, past_value(operand))\n",
|
||||
" is_first_label, thought_vector_broadcast_c, sequence.past_value(operand))\n",
|
||||
"\n",
|
||||
" (decoder_output_h, decoder_output_c) = LSTM_layer(\n",
|
||||
" decoder_output_h.output, hidden_dim, recurrence_hook_h, recurrence_hook_c)\n",
|
||||
|
@ -936,7 +936,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence\n",
|
||||
"decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_hook))"
|
||||
"decoder_input = element_select(is_first_label, label_sentence_start_scattered, sequence.past_value(decoder_history_hook))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
@ -493,8 +493,8 @@ def memoize(func):
|
|||
|
||||
@memoize
|
||||
def _sparse_to_dense_network_cache(input_shape):
|
||||
from cntk.ops import times, input
|
||||
from cntk.ops import times, sequence
|
||||
|
||||
temp_input = input(input_shape)
|
||||
temp_input = sequence.input(input_shape)
|
||||
eye_shape = input_shape[-1]
|
||||
return times(temp_input, np.eye(eye_shape))
|
||||
|
|
|
@ -14,7 +14,7 @@ import numpy as np
|
|||
from cntk import input, placeholder, combine, alias, sequence, parameter, constant
|
||||
from cntk.variables import Record, Constant, Parameter
|
||||
from cntk.axis import Axis
|
||||
from cntk.ops import times, slice, sigmoid, tanh, log, exp, softplus, past_value, future_value
|
||||
from cntk.ops import times, slice, sigmoid, tanh, log, exp, softplus
|
||||
from .typing import Signature
|
||||
from cntk.internal import _as_tuple
|
||||
from cntk.initializer import glorot_uniform
|
||||
|
@ -96,7 +96,7 @@ def ForwardDeclaration(name='forward_declaration'):
|
|||
>>> x = C.input(**Sequence[Tensor[2]])
|
||||
>>> ones_like_input = sequence.broadcast_as(1, x) # sequence of scalar ones of same length as input
|
||||
>>> out_fwd = ForwardDeclaration() # placeholder for the state variables
|
||||
>>> out = past_value(out_fwd, initial_state=0) + ones_like_input
|
||||
>>> out = sequence.past_value(out_fwd, initial_state=0) + ones_like_input
|
||||
>>> out_fwd.resolve_to(out)
|
||||
>>> length = sequence.last(out)
|
||||
>>> x0 = np.reshape(np.arange(6,dtype=np.float32),(1,3,2))
|
||||
|
|
|
@ -208,9 +208,9 @@ def _window(x, axis, begin, end, step, stride, initial_state=None):
|
|||
helper to expand a sequence into a window, splicing them along the given axis (which must already exist)
|
||||
'''
|
||||
shifted = [
|
||||
past_value(x, initial_state=initial_state, time_step=-t) if t < 0 else
|
||||
sequence.past_value(x, initial_state=initial_state, time_step=-t) if t < 0 else
|
||||
x if t == 0 else
|
||||
future_value(x, initial_state=initial_state, time_step=t)
|
||||
sequence.future_value(x, initial_state=initial_state, time_step=t)
|
||||
for t in range(begin, end, step)
|
||||
]
|
||||
r = splice(*shifted, axis=axis)
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
# sequence -- first/higher-order functions over sequences, like Recurrence()
|
||||
|
||||
from ..variables import Record
|
||||
from ..ops import combine, past_value, future_value, splice, sequence
|
||||
from ..ops import combine, splice, sequence
|
||||
from .blocks import *
|
||||
from .blocks import _get_initial_state_or_default, _inject_name
|
||||
|
||||
|
@ -193,7 +193,7 @@ def RecurrenceFrom(step_function, go_backwards=default_override_or(False), retur
|
|||
>>> decoder = RecurrenceFrom(LSTM(500)) # decoder starts from a data-dependent initial state, hence -From()
|
||||
>>> emit = Dense(30000)
|
||||
>>> h, c = encoder(embed(en)).outputs # LSTM encoder has two outputs (h, c)
|
||||
>>> z = emit(decoder(h, c, past_value(fr))) # decoder takes encoder outputs as initial state
|
||||
>>> z = emit(decoder(h, c, sequence.past_value(fr))) # decoder takes encoder outputs as initial state
|
||||
>>> loss = C.cross_entropy_with_softmax(z, fr)
|
||||
|
||||
Args:
|
||||
|
@ -570,7 +570,7 @@ def UnfoldFrom(generator_function, until_predicate=None, length_increase=1, name
|
|||
|
||||
# apply until_predicate if given
|
||||
if until_predicate is not None:
|
||||
valid_frames = Recurrence(lambda h, x: (1-past_value(x)) * h, initial_state=1, name='valid_frames')(until_predicate(output))
|
||||
valid_frames = Recurrence(lambda h, x: (1-sequence.past_value(x)) * h, initial_state=1, name='valid_frames')(until_predicate(output))
|
||||
output = sequence.gather(output, valid_frames, name='valid_output')
|
||||
|
||||
return output
|
||||
|
|
|
@ -435,7 +435,7 @@ class TensorBoardProgressWriter(cntk_py.ProgressWriter):
|
|||
if freq is None:
|
||||
freq = sys.maxsize
|
||||
|
||||
super(TensorBoardProgressWriter, self).__init__(freq, 0, sys.maxsize, 0)
|
||||
super(TensorBoardProgressWriter, self).__init__(freq, 0, sys.maxsize, 0, sys.maxsize, 0)
|
||||
|
||||
# Only log either when rank is not specified or when rank is 0.
|
||||
self.writer = cntk_py.TensorBoardFileWriter(log_dir, model) if not rank else None
|
||||
|
|
|
@ -1633,6 +1633,8 @@ def element_select(flag, value_if_true, value_if_false, name=''):
|
|||
@typemap
|
||||
def future_value(x, initial_state=None, time_step=1, name=''):
|
||||
'''
|
||||
DEPRECATED.
|
||||
|
||||
This function returns the future value w.r.t. ``x``. It is most often used when
|
||||
creating RNNs. The resulting tensor has the same shape as the input but is
|
||||
the next logical sample. The ``time_step`` parameter is the number of steps
|
||||
|
@ -1674,20 +1676,18 @@ def future_value(x, initial_state=None, time_step=1, name=''):
|
|||
:class:`~cntk.ops.functions.Function`
|
||||
'''
|
||||
|
||||
from cntk.internal import sanitize_dtype_cntk
|
||||
from ..cntk_py import Constant
|
||||
from cntk.cntk_py import future_value
|
||||
import warnings
|
||||
warnings.warn('This will be removed in future versions. Please use '
|
||||
'sequence.future_value() instead.', DeprecationWarning)
|
||||
|
||||
if initial_state is None:
|
||||
initial_state = Constant.scalar(sanitize_dtype_cntk(np.float32), 0.0)
|
||||
|
||||
x = sanitize_input(x)
|
||||
return future_value(x, initial_state, time_step, name)
|
||||
return sequence.future_value(x, initial_state, time_step, name)
|
||||
|
||||
|
||||
@typemap
|
||||
def past_value(x, initial_state=None, time_step=1, name=''):
|
||||
'''
|
||||
DEPRECATED.
|
||||
|
||||
This function returns the past value w.r.t. ``x``. It is most often used when
|
||||
creating RNNs. The resulting tensor has the same shape as the input but is
|
||||
the previous logical sample. The ``time_step`` parameter is the number of steps
|
||||
|
@ -1774,17 +1774,11 @@ def past_value(x, initial_state=None, time_step=1, name=''):
|
|||
:class:`~cntk.ops.functions.Function`
|
||||
'''
|
||||
|
||||
from cntk.internal import sanitize_dtype_cntk
|
||||
from ..cntk_py import Constant
|
||||
from cntk.cntk_py import past_value
|
||||
import warnings
|
||||
warnings.warn('This will be removed in future versions. Please use '
|
||||
'sequence.past_value() instead.', DeprecationWarning)
|
||||
|
||||
if initial_state is None:
|
||||
initial_state = Constant.scalar(sanitize_dtype_cntk(np.float32), 0.0)
|
||||
else:
|
||||
initial_state = sanitize_input(initial_state)
|
||||
|
||||
x = sanitize_input(x)
|
||||
return past_value(x, initial_state, time_step, name)
|
||||
return sequence.past_value(x, initial_state, time_step, name)
|
||||
|
||||
|
||||
# TODO: does this belong into .sequence?
|
||||
|
|
|
@ -38,9 +38,166 @@ def input(shape, dtype=default_override_or(np.float32), needs_gradient=False, is
|
|||
# sequence ops
|
||||
##########################################################################
|
||||
|
||||
@typemap
|
||||
def future_value(x, initial_state=None, time_step=1, name=''):
|
||||
'''
|
||||
This function returns the future value w.r.t. ``x``. It is most often used when
|
||||
creating RNNs. The resulting tensor has the same shape as the input but is
|
||||
the next logical sample. The ``time_step`` parameter is the number of steps
|
||||
to look into the future and is 1 by default. If there is no future value (i.e.
|
||||
the current sample is the last one in the tensor) then the ``initial_state``
|
||||
value is returned.
|
||||
|
||||
The initial state can be a constant (scalar or tensor), a learnable tensor
|
||||
or input data (which has a batch dimension, as needed for sequence-to-sequence models).
|
||||
|
||||
Example:
|
||||
>>> x = C.sequence.input(shape=(3,2))
|
||||
>>> # Create one sequence with 4 tensors of shape (3, 2)
|
||||
>>> x0 = np.reshape(np.arange(24,dtype=np.float32),(1,4,3,2))
|
||||
>>> y = C.sequence.future_value(x) # using initial state of 0 by default
|
||||
>>> y.eval({x:x0})
|
||||
[array([[[ 6., 7.],
|
||||
[ 8., 9.],
|
||||
[ 10., 11.]],
|
||||
<BLANKLINE>
|
||||
[[ 12., 13.],
|
||||
[ 14., 15.],
|
||||
[ 16., 17.]],
|
||||
<BLANKLINE>
|
||||
[[ 18., 19.],
|
||||
[ 20., 21.],
|
||||
[ 22., 23.]],
|
||||
<BLANKLINE>
|
||||
[[ 0., 0.],
|
||||
[ 0., 0.],
|
||||
[ 0., 0.]]], dtype=float32)]
|
||||
|
||||
Args:
|
||||
x: the tensor (or its name) from which the future value is obtained.
|
||||
initial_state: tensor or scalar representing the initial value to be used when the input tensor is shifted in time.
|
||||
time_step (int): the number of time steps to look into the future (default 1)
|
||||
name (str, optional): the name of the Function instance in the network
|
||||
Returns:
|
||||
:class:`~cntk.ops.functions.Function`
|
||||
'''
|
||||
|
||||
from cntk.internal import sanitize_dtype_cntk
|
||||
from ...cntk_py import Constant
|
||||
from cntk.cntk_py import future_value
|
||||
|
||||
if initial_state is None:
|
||||
initial_state = Constant.scalar(sanitize_dtype_cntk(np.float32), 0.0)
|
||||
|
||||
x = sanitize_input(x)
|
||||
return future_value(x, initial_state, time_step, name)
|
||||
|
||||
|
||||
@typemap
|
||||
def past_value(x, initial_state=None, time_step=1, name=''):
|
||||
'''
|
||||
This function returns the past value w.r.t. ``x``. It is most often used when
|
||||
creating RNNs. The resulting tensor has the same shape as the input but is
|
||||
the previous logical sample. The ``time_step`` parameter is the number of steps
|
||||
to look into the past and is 1 by default. If there is no past value (i.e.
|
||||
the current sample is the first one in the tensor) then the ``initial_state``
|
||||
value is returned.
|
||||
|
||||
The initial state can be a constant (scalar or tensor), a learnable tensor
|
||||
or input data (which has a batch dimension, as needed for sequence-to-sequence models).
|
||||
|
||||
Example:
|
||||
>>> # create example input: one sequence with 4 tensors of shape (3, 2)
|
||||
>>> from cntk.layers.typing import Tensor, Sequence
|
||||
>>> x = C.sequence.input((3,2))
|
||||
>>> x0 = np.reshape(np.arange(24,dtype=np.float32),(1,4,3,2))
|
||||
>>> x0
|
||||
array([[[[ 0., 1.],
|
||||
[ 2., 3.],
|
||||
[ 4., 5.]],
|
||||
<BLANKLINE>
|
||||
[[ 6., 7.],
|
||||
[ 8., 9.],
|
||||
[ 10., 11.]],
|
||||
<BLANKLINE>
|
||||
[[ 12., 13.],
|
||||
[ 14., 15.],
|
||||
[ 16., 17.]],
|
||||
<BLANKLINE>
|
||||
[[ 18., 19.],
|
||||
[ 20., 21.],
|
||||
[ 22., 23.]]]], dtype=float32)
|
||||
|
||||
>>> # this demonstrates how past_value shifts the sequence by one, padding with initial_state
|
||||
>>> y = C.sequence.past_value(x) # initial_state is 0 by default
|
||||
>>> y.eval({x:x0})
|
||||
[array([[[ 0., 0.],
|
||||
[ 0., 0.],
|
||||
[ 0., 0.]],
|
||||
<BLANKLINE>
|
||||
[[ 0., 1.],
|
||||
[ 2., 3.],
|
||||
[ 4., 5.]],
|
||||
<BLANKLINE>
|
||||
[[ 6., 7.],
|
||||
[ 8., 9.],
|
||||
[ 10., 11.]],
|
||||
<BLANKLINE>
|
||||
[[ 12., 13.],
|
||||
[ 14., 15.],
|
||||
[ 16., 17.]]], dtype=float32)]
|
||||
|
||||
>>> # here, we pass a the initial_state as input data (e.g. sequence-to-sequence)
|
||||
>>> s = C.input((3,2)) # not a sequence, e.g. a final encoder hidden state
|
||||
>>> s0 = np.reshape(np.arange(6,dtype=np.float32)/2,(1,3,2))
|
||||
>>> s0
|
||||
array([[[ 0. , 0.5],
|
||||
[ 1. , 1.5],
|
||||
[ 2. , 2.5]]], dtype=float32)
|
||||
>>> y = C.sequence.past_value(x, initial_state=s)
|
||||
>>> y.eval({x:x0, s:s0}) # same as the previous example except for the first time step
|
||||
[array([[[ 0. , 0.5],
|
||||
[ 1. , 1.5],
|
||||
[ 2. , 2.5]],
|
||||
<BLANKLINE>
|
||||
[[ 0. , 1. ],
|
||||
[ 2. , 3. ],
|
||||
[ 4. , 5. ]],
|
||||
<BLANKLINE>
|
||||
[[ 6. , 7. ],
|
||||
[ 8. , 9. ],
|
||||
[ 10. , 11. ]],
|
||||
<BLANKLINE>
|
||||
[[ 12. , 13. ],
|
||||
[ 14. , 15. ],
|
||||
[ 16. , 17. ]]], dtype=float32)]
|
||||
|
||||
Args:
|
||||
x: the tensor (or its name) from which the past value is obtained
|
||||
initial_state: tensor or scalar representing the initial value to be used when the input tensor is shifted in time.
|
||||
time_step (int): the number of time steps to look into the past (default 1)
|
||||
name (str, optional): the name of the Function instance in the network
|
||||
|
||||
Returns:
|
||||
:class:`~cntk.ops.functions.Function`
|
||||
'''
|
||||
|
||||
from cntk.internal import sanitize_dtype_cntk
|
||||
from ...cntk_py import Constant
|
||||
from cntk.cntk_py import past_value
|
||||
|
||||
if initial_state is None:
|
||||
initial_state = Constant.scalar(sanitize_dtype_cntk(np.float32), 0.0)
|
||||
else:
|
||||
initial_state = sanitize_input(initial_state)
|
||||
|
||||
x = sanitize_input(x)
|
||||
return past_value(x, initial_state, time_step, name)
|
||||
|
||||
|
||||
def delay(x, initial_state=None, time_step=1, name=''):
|
||||
'''
|
||||
This function combines :func:`~cntk.ops.past_value` and :func:`~cntk.ops.future_value` into a single function.
|
||||
This function combines :func:`~cntk.ops.sequence.past_value` and :func:`~cntk.ops.sequence.future_value` into a single function.
|
||||
This is useful when the time_step is computed and can be positive, negative, or 0.
|
||||
|
||||
Args:
|
||||
|
@ -49,7 +206,7 @@ def delay(x, initial_state=None, time_step=1, name=''):
|
|||
time_step (int): the number of time steps to look into the past, where negative values mean to look into the future, and 0 means a no-op (default 1).
|
||||
name (str, optional): the name of the Function instance in the network
|
||||
'''
|
||||
from ...ops import alias, past_value, future_value, element_select, element_divide, placeholder, exp
|
||||
from ...ops import alias, element_select, element_divide, placeholder, exp
|
||||
if time_step > 0:
|
||||
return past_value (x, time_step= time_step, initial_state=initial_state, name=name)
|
||||
elif time_step < 0:
|
||||
|
@ -424,7 +581,7 @@ def reduce_max(x, name=''):
|
|||
Returns:
|
||||
The max value in the input sequence
|
||||
"""
|
||||
from ...ops import past_value, future_value, element_select, placeholder, greater
|
||||
from ...ops import element_select, placeholder, greater
|
||||
m = placeholder(shape=(1,), dynamic_axes = x.dynamic_axes, name='max')
|
||||
o = element_select(greater(x, future_value(m)), x, future_value(m))
|
||||
rlt = o.replace_placeholders({m:sanitize_input(o)})
|
||||
|
|
|
@ -13,7 +13,7 @@ import pytest
|
|||
from ..functions import *
|
||||
from ...train.trainer import *
|
||||
from ...initializer import glorot_uniform
|
||||
from .. import constant, parameter, input, placeholder, times, plus, past_value, sequence, as_composite, combine, convolution, splice, as_block
|
||||
from .. import constant, parameter, input, placeholder, times, plus, sequence, as_composite, combine, convolution, splice, as_block
|
||||
from ... import InferredDimension, gpu, cpu
|
||||
from .ops_test_utils import compare_lists_of_np_arrays, AA, cntk_device
|
||||
|
||||
|
@ -190,7 +190,7 @@ def test_data_type_inference():
|
|||
def test_recurrence_shape_inference():
|
||||
i = sequence.input((2,))
|
||||
p = placeholder()
|
||||
p_past = past_value(p)
|
||||
p_past = sequence.past_value(p)
|
||||
p_past_plus_i = p_past + i
|
||||
|
||||
p_past_plus_i.replace_placeholder(p_past_plus_i.output)
|
||||
|
|
|
@ -58,8 +58,7 @@ def test_op_future_value(input_size, time_step, initial_state, device_id, precis
|
|||
}
|
||||
init = parameter(init=AA(initial_state, dtype=dt), device=cntk_device(device_id))
|
||||
|
||||
from .. import future_value
|
||||
input_op_input = future_value(a, init, time_step)
|
||||
input_op_input = sequence.future_value(a, init, time_step)
|
||||
|
||||
unittest_helper(input_op_input,
|
||||
x, expected_forward, expected_backward,
|
||||
|
@ -95,8 +94,7 @@ def test_op_past_value(input_size, time_step, initial_state, device_id, precisio
|
|||
|
||||
init = parameter(init=AA(initial_state, dtype=dt), device=cntk_device(device_id))
|
||||
|
||||
from .. import past_value
|
||||
input_op_input = past_value(a, init, time_step)
|
||||
input_op_input = sequence.past_value(a, init, time_step)
|
||||
|
||||
unittest_helper(input_op_input,
|
||||
x, expected_forward, expected_backward,
|
||||
|
|
|
@ -513,7 +513,7 @@ def test_op_broadcast_as(device_id, precision):
|
|||
|
||||
|
||||
def test_op_broadcast_as_in_loop(device_id):
|
||||
from .. import sequence, placeholder, past_value, input
|
||||
from .. import sequence, placeholder, input
|
||||
|
||||
a_data = [AA([1]), AA([2]), AA([3])]
|
||||
b_data = [AA([[2]]), AA([[2], [3]]), AA([[2], [3], [4]])]
|
||||
|
@ -522,7 +522,7 @@ def test_op_broadcast_as_in_loop(device_id):
|
|||
b = sequence.input(shape=(1,), name='b')
|
||||
|
||||
out_placeholder = placeholder()
|
||||
out_delayed = past_value(out_placeholder, time_step=5)
|
||||
out_delayed = sequence.past_value(out_placeholder, time_step=5)
|
||||
out_delayed_plus_b = out_delayed + b
|
||||
out = sequence.broadcast_as(a, out_delayed_plus_b)
|
||||
out.replace_placeholder(out)
|
||||
|
|
|
@ -135,7 +135,7 @@ def test_training_3d_sparse_sequence_with_recurrence(device_id):
|
|||
a_projection = times(a, w_i)
|
||||
|
||||
p_o = C.placeholder()
|
||||
h = C.past_value(p_o)
|
||||
h = C.sequence.past_value(p_o)
|
||||
w_h = C.parameter(init=w_init_h, device=dev)
|
||||
h_projection = times(h, w_h)
|
||||
z = a_projection + h_projection
|
||||
|
|
|
@ -10,7 +10,7 @@ from cntk import *
|
|||
|
||||
def test_outputs():
|
||||
fwd_state = placeholder("placeholder")
|
||||
prev_state = past_value(fwd_state, name="prev_state")
|
||||
prev_state = sequence.past_value(fwd_state, name="prev_state")
|
||||
z = abs(prev_state, "abs")
|
||||
output = z.output
|
||||
z = z.replace_placeholders({fwd_state: z.output})
|
||||
|
|
|
@ -71,7 +71,7 @@ introduced, overloaded operators can be applied to them to form an operator grap
|
|||
import cntk as C
|
||||
|
||||
# Create an input with the shape (2,3,*)
|
||||
>>> x = C.input((2,3), name='features')
|
||||
>>> x = C.sequence.input((2,3), name='features')
|
||||
|
||||
# Create a constant scalar with value 2
|
||||
>>> c = C.constant(value=2)
|
||||
|
@ -80,29 +80,29 @@ introduced, overloaded operators can be applied to them to form an operator grap
|
|||
>>> w = C.parameter((2,3))
|
||||
|
||||
# Set up some test input data to check the operators.
|
||||
# We specify a full batch having one element, which is a
|
||||
# We specify a full batch having a sequence with one element, which is a
|
||||
# (2,3) matrix.
|
||||
>>> test_input = [ np.asarray([[10,20,30],[40,50,60]]) ]
|
||||
>>> test_input = [[ np.asarray([[10,20,30],[40,50,60]]) ]]
|
||||
|
||||
# Elementwise multiplication operation
|
||||
>>> op = x * c
|
||||
|
||||
# Evaluate the op using test_input
|
||||
>>> print(op.eval({ x: test_input }))
|
||||
array([[[ 20., 40., 60.],
|
||||
[ 80., 100., 120.]]], dtype=float32)
|
||||
[array([[[ 20., 40., 60.],
|
||||
[ 80., 100., 120.]]], dtype=float32)]
|
||||
|
||||
# Same as above (2 will be converted to constant)
|
||||
>>> op2 = x * 2
|
||||
>>> print(op2.eval({ x: test_input }))
|
||||
array([[[ 20., 40., 60.],
|
||||
[ 80., 100., 120.]]], dtype=float32)
|
||||
[array([[[ 20., 40., 60.],
|
||||
[ 80., 100., 120.]]], dtype=float32)]
|
||||
|
||||
# Elementwise multiplication of two 2x3 matrices
|
||||
>>> op3 = x * [[1,2,3], [4,5,6]]
|
||||
>>> print(op3.eval({ x: test_input}))
|
||||
array([[[ 10., 40., 90.],
|
||||
[ 160., 250., 360.]]], dtype=float32)
|
||||
[array([[[ 10., 40., 90.],
|
||||
[ 160., 250., 360.]]], dtype=float32)]
|
||||
|
||||
|
||||
Broadcasting
|
||||
|
|
|
@ -866,8 +866,8 @@ end with a zero:
|
|||
Notes
|
||||
~~~~~
|
||||
|
||||
This layer is a wrapper around the ``past_value()`` and
|
||||
``future_value()`` primitives.
|
||||
This layer is a wrapper around the ``sequence.past_value()`` and
|
||||
``sequence.future_value()`` primitives.
|
||||
|
||||
Example
|
||||
~~~~~~~
|
||||
|
|
Загрузка…
Ссылка в новой задаче