Merge pull request #3424 from mozilla/io-fixes

Fix I/O issues introduced in #3420
2020-11-18 08:07:10 +02:00 · 2020-11-18 08:07:10 +02:00 · ab1288ffde
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,2 @@
+tensorflow/
+data/
--- a/bin/compare_samples.py
+++ b/bin/compare_samples.py
@ -15,8 +15,8 @@ def fail(message):


 def compare_samples():
-    sample1 = load_sample(CLI_ARGS.sample1)
-    sample2 = load_sample(CLI_ARGS.sample2)
+    sample1 = load_sample(CLI_ARGS.sample1).unpack()
+    sample2 = load_sample(CLI_ARGS.sample2).unpack()
    if sample1.audio_format != sample2.audio_format:
        fail('Samples differ on: audio-format ({} and {})'.format(sample1.audio_format, sample2.audio_format))
    if sample1.duration != sample2.duration:
--- a/training/deepspeech_training/train.py
+++ b/training/deepspeech_training/train.py
@ -811,7 +811,7 @@ def export():
        load_graph_for_evaluation(session)

        output_filename = FLAGS.export_file_name + '.pb'
-        if FLAGS.remove_remote_export:
+        if FLAGS.remove_export:
            if isdir_remote(FLAGS.export_dir):
                log_info('Removing old export')
                remove_remote(FLAGS.export_dir)
--- a/training/deepspeech_training/util/audio.py
+++ b/training/deepspeech_training/util/audio.py
@ -118,15 +118,19 @@ class Sample:
        self.audio_type = new_audio_type


-def _change_audio_type(sample_and_audio_type):
-    sample, audio_type, bitrate = sample_and_audio_type
+def _unpack_and_change_audio_type(sample_and_audio_type):
+    packed_sample, audio_type, bitrate = sample_and_audio_type
+    if hasattr(sample, 'unpack'):
+        sample = packed_sample.unpack()
+    else:
+        sample = packed_sample
    sample.change_audio_type(audio_type, bitrate=bitrate)
    return sample


-def change_audio_types(samples, audio_type=AUDIO_TYPE_PCM, bitrate=None, processes=None, process_ahead=None):
+def change_audio_types(packed_samples, audio_type=AUDIO_TYPE_PCM, bitrate=None, processes=None, process_ahead=None):
    with LimitingPool(processes=processes, process_ahead=process_ahead) as pool:
-        yield from pool.imap(_change_audio_type, map(lambda s: (s, audio_type, bitrate), samples))
+        yield from pool.imap(_unpack_and_change_audio_type, map(lambda s: (s, audio_type, bitrate), packed_samples))


 def get_audio_type_from_extension(ext):
--- a/training/deepspeech_training/util/augmentations.py
+++ b/training/deepspeech_training/util/augmentations.py
@ -152,7 +152,10 @@ def _init_augmentation_worker(preparation_context):

 def _load_and_augment_sample(timed_sample, context=None):
    sample, clock = timed_sample
-    realized_sample = sample.unpack()
+    if hasattr(sample, 'unpack'):
+        realized_sample = sample.unpack()
+    else:
+        realized_sample = sample
    return _augment_sample((realized_sample, clock), context)


--- a/training/deepspeech_training/util/taskcluster.py
+++ b/training/deepspeech_training/util/taskcluster.py
@ -14,8 +14,6 @@ import sys

 from pkg_resources import parse_version

-from .io import isdir_remote, open_remote, is_remote_path
-
 DEFAULT_SCHEMES = {
    'deepspeech': 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s',
    'tensorflow': 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s'
@ -43,7 +41,7 @@ def maybe_download_tc(target_dir, tc_url, progress=True):

    assert target_dir is not None

-    if not is_remote_path(target_dir):
+    if not os.path.isdir(target_dir):
        try:
            os.makedirs(target_dir)
        except OSError as e:
@ -62,7 +60,7 @@ def maybe_download_tc(target_dir, tc_url, progress=True):
        print('File already exists: %s' % target_file)

    if is_gzip:
-        with open_remote(target_file, "r+b") as frw:
+        with open(target_file, "r+b") as frw:
            decompressed = gzip.decompress(frw.read())
            frw.seek(0)
            frw.write(decompressed)