diff --git a/DeepSpeech.py b/DeepSpeech.py index f2f5ebcd..5183407e 100755 --- a/DeepSpeech.py +++ b/DeepSpeech.py @@ -44,7 +44,7 @@ def variable_on_cpu(name, shape, initializer): def create_overlapping_windows(batch_x): - batch_size = tf.shape(batch_x)[0] + batch_size = tf.shape(input=batch_x)[0] window_width = 2 * Config.n_context + 1 num_channels = Config.n_input @@ -55,7 +55,7 @@ def create_overlapping_windows(batch_x): .reshape(window_width, num_channels, window_width * num_channels), tf.float32) # pylint: disable=bad-continuation # Create overlapping windows - batch_x = tf.nn.conv1d(batch_x, eye_filter, stride=1, padding='SAME') + batch_x = tf.nn.conv1d(input=batch_x, filters=eye_filter, stride=1, padding='SAME') # Remove dummy depth dimension and reshape into [batch_size, n_windows, window_width, n_input] batch_x = tf.reshape(batch_x, [batch_size, -1, window_width, num_channels]) @@ -65,8 +65,8 @@ def create_overlapping_windows(batch_x): def dense(name, x, units, dropout_rate=None, relu=True): with tfv1.variable_scope(name): - bias = variable_on_cpu('bias', [units], tf.zeros_initializer()) - weights = variable_on_cpu('weights', [x.shape[-1], units], tf.contrib.layers.xavier_initializer()) + bias = variable_on_cpu('bias', [units], tfv1.zeros_initializer()) + weights = variable_on_cpu('weights', [x.shape[-1], units], tfv1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform")) output = tf.nn.bias_add(tf.matmul(x, weights), bias) @@ -147,7 +147,7 @@ def create_model(batch_x, seq_length, dropout, reuse=False, batch_size=None, pre # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context] if not batch_size: - batch_size = tf.shape(batch_x)[0] + batch_size = tf.shape(input=batch_x)[0] # Create overlapping feature windows if needed if overlap: @@ -157,7 +157,7 @@ def create_model(batch_x, seq_length, dropout, reuse=False, batch_size=None, pre # This is done to prepare the batch for input into the first layer which expects a tensor of rank `2`. # Permute n_steps and batch_size - batch_x = tf.transpose(batch_x, [1, 0, 2, 3]) + batch_x = tf.transpose(a=batch_x, perm=[1, 0, 2, 3]) # Reshape to prepare input for first layer batch_x = tf.reshape(batch_x, [-1, Config.n_input + 2*Config.n_input*Config.n_context]) # (n_steps*batch_size, n_input + 2*n_input*n_context) layers['input_reshaped'] = batch_x @@ -232,7 +232,7 @@ def calculate_mean_edit_distance_and_loss(iterator, dropout, reuse): non_finite_files = tf.gather(batch_filenames, tfv1.where(~tf.math.is_finite(total_loss))) # Calculate the average loss across the batch - avg_loss = tf.reduce_mean(total_loss) + avg_loss = tf.reduce_mean(input_tensor=total_loss) # Finally we return the average loss return avg_loss, non_finite_files @@ -312,7 +312,7 @@ def get_tower_results(iterator, optimizer, dropout_rates): tower_non_finite_files.append(non_finite_files) - avg_loss_across_towers = tf.reduce_mean(tower_avg_losses, 0) + avg_loss_across_towers = tf.reduce_mean(input_tensor=tower_avg_losses, axis=0) tfv1.summary.scalar(name='step_loss', tensor=avg_loss_across_towers, collections=['step_summaries']) all_non_finite_files = tf.concat(tower_non_finite_files, axis=0) @@ -346,7 +346,7 @@ def average_gradients(tower_gradients): # Average over the 'tower' dimension grad = tf.concat(grads, 0) - grad = tf.reduce_mean(grad, 0) + grad = tf.reduce_mean(input_tensor=grad, axis=0) # Create a gradient/variable tuple for the current variable with its average gradient grad_and_var = (grad, grad_and_vars[0][1]) @@ -369,11 +369,11 @@ def log_variable(variable, gradient=None): Furthermore it logs a histogram of its state and (if given) of an optimization gradient. ''' name = variable.name.replace(':', '_') - mean = tf.reduce_mean(variable) + mean = tf.reduce_mean(input_tensor=variable) tfv1.summary.scalar(name='%s/mean' % name, tensor=mean) - tfv1.summary.scalar(name='%s/sttdev' % name, tensor=tf.sqrt(tf.reduce_mean(tf.square(variable - mean)))) - tfv1.summary.scalar(name='%s/max' % name, tensor=tf.reduce_max(variable)) - tfv1.summary.scalar(name='%s/min' % name, tensor=tf.reduce_min(variable)) + tfv1.summary.scalar(name='%s/sttdev' % name, tensor=tf.sqrt(tf.reduce_mean(input_tensor=tf.square(variable - mean)))) + tfv1.summary.scalar(name='%s/max' % name, tensor=tf.reduce_max(input_tensor=variable)) + tfv1.summary.scalar(name='%s/min' % name, tensor=tf.reduce_min(input_tensor=variable)) tfv1.summary.histogram(name=name, values=variable) if gradient is not None: if isinstance(gradient, tf.IndexedSlices): @@ -667,7 +667,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): previous_state_c = tfv1.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_c') previous_state_h = tfv1.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_h') - previous_state = tf.contrib.rnn.LSTMStateTuple(previous_state_c, previous_state_h) + previous_state = tf.nn.rnn_cell.LSTMStateTuple(previous_state_c, previous_state_h) # One rate per layer no_dropout = [None] * 6 diff --git a/bin/graphdef_binary_to_text.py b/bin/graphdef_binary_to_text.py index 6dd8ba08..f0932759 100755 --- a/bin/graphdef_binary_to_text.py +++ b/bin/graphdef_binary_to_text.py @@ -1,14 +1,20 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import tensorflow as tf +import tensorflow.compat.v1 as tfv1 import sys -# Load and export as string -with tf.gfile.FastGFile(sys.argv[1], 'rb') as fin: - graph_def = tf.GraphDef() - graph_def.ParseFromString(fin.read()) +from google.protobuf import text_format - with tf.gfile.FastGFile(sys.argv[1] + 'txt', 'w') as fout: - from google.protobuf import text_format - fout.write(text_format.MessageToString(graph_def)) + +def main(): + # Load and export as string + with tfv1.gfile.FastGFile(sys.argv[1], 'rb') as fin: + graph_def = tfv1.GraphDef() + graph_def.ParseFromString(fin.read()) + + with tfv1.gfile.FastGFile(sys.argv[1] + 'txt', 'w') as fout: + fout.write(text_format.MessageToString(graph_def)) + +if __name__ == '__main__': + main() diff --git a/bin/ops_in_graph.py b/bin/ops_in_graph.py index 9078a91d..c7c5b487 100755 --- a/bin/ops_in_graph.py +++ b/bin/ops_in_graph.py @@ -1,11 +1,15 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import tensorflow as tf +import tensorflow.compat.v1 as tfv1 import sys -with tf.gfile.FastGFile(sys.argv[1], 'rb') as fin: - graph_def = tf.GraphDef() - graph_def.ParseFromString(fin.read()) +def main(): + with tfv1.gfile.FastGFile(sys.argv[1], 'rb') as fin: + graph_def = tfv1.GraphDef() + graph_def.ParseFromString(fin.read()) - print('\n'.join(sorted(set(n.op for n in graph_def.node)))) + print('\n'.join(sorted(set(n.op for n in graph_def.node)))) + +if __name__ == '__main__': + main() diff --git a/evaluate.py b/evaluate.py index 1f672a06..c86ebc1e 100755 --- a/evaluate.py +++ b/evaluate.py @@ -63,7 +63,7 @@ def evaluate(test_csvs, create_model, try_loading): dropout=no_dropout) # Transpose to batch major and apply softmax for decoder - transposed = tf.nn.softmax(tf.transpose(logits, [1, 0, 2])) + transposed = tf.nn.softmax(tf.transpose(a=logits, perm=[1, 0, 2])) loss = tfv1.nn.ctc_loss(labels=batch_y, inputs=logits, diff --git a/util/config.py b/util/config.py index 6204b997..b1b921c7 100644 --- a/util/config.py +++ b/util/config.py @@ -2,6 +2,7 @@ from __future__ import absolute_import, division, print_function import os import tensorflow as tf +import tensorflow.compat.v1 as tfv1 from attrdict import AttrDict from xdg import BaseDirectory as xdg @@ -57,9 +58,9 @@ def initialize_globals(): FLAGS.summary_dir = xdg.save_data_path(os.path.join('deepspeech', 'summaries')) # Standard session configuration that'll be used for all new sessions. - c.session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_placement, - inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, - intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads) + c.session_config = tfv1.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_placement, + inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, + intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads) c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path)) diff --git a/util/feeding.py b/util/feeding.py index 1a928951..f36b381f 100644 --- a/util/feeding.py +++ b/util/feeding.py @@ -39,7 +39,7 @@ def samples_to_mfccs(samples, sample_rate): mfccs = contrib_audio.mfcc(spectrogram, sample_rate, dct_coefficient_count=Config.n_input) mfccs = tf.reshape(mfccs, [-1, Config.n_input]) - return mfccs, tf.shape(mfccs)[0] + return mfccs, tf.shape(input=mfccs)[0] def audiofile_to_features(wav_filename):