зеркало из https://github.com/mozilla/DeepSpeech.git
Sort importer imports with isort
This commit is contained in:
Родитель
20b0ab17ea
Коммит
b7e6b8c3e6
|
@ -0,0 +1,4 @@
|
|||
[settings]
|
||||
line_length=80
|
||||
multi_line_output=3
|
||||
default_section=FIRSTPARTY
|
|
@ -6,11 +6,19 @@ Use "python3 build_sdb.py -h" for help
|
|||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import argparse
|
||||
|
||||
import progressbar
|
||||
|
||||
from deepspeech_training.util.audio import (
|
||||
AUDIO_TYPE_OPUS,
|
||||
AUDIO_TYPE_WAV,
|
||||
change_audio_types
|
||||
)
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
||||
from deepspeech_training.util.audio import change_audio_types, AUDIO_TYPE_WAV, AUDIO_TYPE_OPUS
|
||||
from deepspeech_training.util.sample_collections import samples_from_files, DirectSDBWriter
|
||||
from deepspeech_training.util.sample_collections import (
|
||||
DirectSDBWriter,
|
||||
samples_from_files
|
||||
)
|
||||
|
||||
AUDIO_TYPE_LOOKUP = {
|
||||
'wav': AUDIO_TYPE_WAV,
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import tensorflow.compat.v1 as tfv1
|
||||
import sys
|
||||
|
||||
import tensorflow.compat.v1 as tfv1
|
||||
from google.protobuf import text_format
|
||||
|
||||
|
||||
|
|
|
@ -3,9 +3,10 @@ from __future__ import absolute_import, division, print_function
|
|||
|
||||
import glob
|
||||
import os
|
||||
import pandas
|
||||
import tarfile
|
||||
|
||||
import pandas
|
||||
|
||||
from deepspeech_training.util.importers import get_importers_parser
|
||||
|
||||
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
|
|
|
@ -3,9 +3,10 @@ from __future__ import absolute_import, division, print_function
|
|||
|
||||
import glob
|
||||
import os
|
||||
import pandas
|
||||
import tarfile
|
||||
|
||||
import pandas
|
||||
|
||||
from deepspeech_training.util.importers import get_importers_parser
|
||||
|
||||
COLUMNNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
|
|
|
@ -3,15 +3,22 @@ from __future__ import absolute_import, division, print_function
|
|||
|
||||
import csv
|
||||
import os
|
||||
import progressbar
|
||||
import sox
|
||||
import subprocess
|
||||
import tarfile
|
||||
|
||||
from glob import glob
|
||||
from multiprocessing import Pool
|
||||
from deepspeech_training.util.importers import validate_label_eng as validate_label, get_counter, get_imported_samples, print_import_report
|
||||
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
|
||||
|
||||
import progressbar
|
||||
import sox
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
print_import_report
|
||||
)
|
||||
from deepspeech_training.util.importers import \
|
||||
validate_label_eng as validate_label
|
||||
|
||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
SAMPLE_RATE = 16000
|
||||
|
|
|
@ -10,16 +10,22 @@ from __future__ import absolute_import, division, print_function
|
|||
|
||||
import csv
|
||||
import os
|
||||
import progressbar
|
||||
import sox
|
||||
import subprocess
|
||||
import unicodedata
|
||||
|
||||
from multiprocessing import Pool
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
||||
from deepspeech_training.util.text import Alphabet
|
||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
|
||||
|
||||
import progressbar
|
||||
import sox
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
get_importers_parser,
|
||||
get_validate_label,
|
||||
print_import_report
|
||||
)
|
||||
from deepspeech_training.util.text import Alphabet
|
||||
|
||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
SAMPLE_RATE = 16000
|
||||
|
|
|
@ -1,20 +1,24 @@
|
|||
#!/usr/bin/env python
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
# Prerequisite: Having the sph2pipe tool in your PATH:
|
||||
# https://www.ldc.upenn.edu/language-resources/tools/sphere-conversion-tools
|
||||
|
||||
import codecs
|
||||
import fnmatch
|
||||
import librosa
|
||||
import os
|
||||
import pandas
|
||||
import soundfile # <= Has an external dependency on libsndfile
|
||||
import subprocess
|
||||
import sys
|
||||
import unicodedata
|
||||
|
||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
||||
import librosa
|
||||
import pandas
|
||||
import soundfile # <= Has an external dependency on libsndfile
|
||||
|
||||
from deepspeech_training.util.importers import \
|
||||
validate_label_eng as validate_label
|
||||
|
||||
# Prerequisite: Having the sph2pipe tool in your PATH:
|
||||
# https://www.ldc.upenn.edu/language-resources/tools/sphere-conversion-tools
|
||||
|
||||
|
||||
|
||||
def _download_and_preprocess_data(data_dir):
|
||||
# Assume data_dir contains extracted LDC2004S13, LDC2004T19, LDC2005S13, LDC2005T19
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import glob
|
||||
import numpy as np
|
||||
import os
|
||||
import pandas
|
||||
import tarfile
|
||||
|
||||
import numpy as np
|
||||
import pandas
|
||||
|
||||
from deepspeech_training.util.importers import get_importers_parser
|
||||
|
||||
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
|
|
|
@ -4,15 +4,18 @@ import csv
|
|||
import logging
|
||||
import math
|
||||
import os
|
||||
import pandas as pd
|
||||
import swifter
|
||||
import subprocess
|
||||
import urllib
|
||||
|
||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
from sox import Transformer
|
||||
|
||||
import swifter
|
||||
from deepspeech_training.util.importers import (
|
||||
get_importers_parser,
|
||||
get_validate_label
|
||||
)
|
||||
|
||||
__version__ = "0.1.0"
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
#!/usr/bin/env python
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import pandas
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pandas
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
|
||||
|
||||
def _download_and_preprocess_data(data_dir):
|
||||
# Conditionally download data
|
||||
LDC93S1_BASE = "LDC93S1"
|
||||
|
|
|
@ -4,17 +4,18 @@ from __future__ import absolute_import, division, print_function
|
|||
import codecs
|
||||
import fnmatch
|
||||
import os
|
||||
import pandas
|
||||
import progressbar
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
import unicodedata
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
import pandas
|
||||
import progressbar
|
||||
from sox import Transformer
|
||||
from tensorflow.python.platform import gfile
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
|
||||
SAMPLE_RATE = 16000
|
||||
|
||||
def _download_and_preprocess_data(data_dir):
|
||||
|
|
|
@ -4,20 +4,25 @@ from __future__ import absolute_import, division, print_function
|
|||
import argparse
|
||||
import csv
|
||||
import os
|
||||
import progressbar
|
||||
import re
|
||||
import sox
|
||||
import subprocess
|
||||
import unicodedata
|
||||
import zipfile
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
|
||||
from deepspeech_training.util.text import Alphabet
|
||||
from glob import glob
|
||||
from multiprocessing import Pool
|
||||
|
||||
import progressbar
|
||||
import sox
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
get_importers_parser,
|
||||
get_validate_label,
|
||||
print_import_report
|
||||
)
|
||||
from deepspeech_training.util.text import Alphabet
|
||||
|
||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
SAMPLE_RATE = 16000
|
||||
|
|
|
@ -4,18 +4,24 @@ from __future__ import absolute_import, division, print_function
|
|||
|
||||
import csv
|
||||
import os
|
||||
import progressbar
|
||||
import subprocess
|
||||
import tarfile
|
||||
import unicodedata
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
|
||||
from deepspeech_training.util.text import Alphabet
|
||||
from glob import glob
|
||||
from multiprocessing import Pool
|
||||
|
||||
import progressbar
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
get_importers_parser,
|
||||
get_validate_label,
|
||||
print_import_report
|
||||
)
|
||||
from deepspeech_training.util.text import Alphabet
|
||||
|
||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
SAMPLE_RATE = 16000
|
||||
MAX_SECS = 15
|
||||
|
|
|
@ -3,10 +3,11 @@ from __future__ import absolute_import, division, print_function
|
|||
|
||||
import glob
|
||||
import os
|
||||
import pandas
|
||||
import tarfile
|
||||
import wave
|
||||
|
||||
import pandas
|
||||
|
||||
from deepspeech_training.util.importers import get_importers_parser
|
||||
|
||||
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
|
|
|
@ -3,11 +3,12 @@ from __future__ import absolute_import, division, print_function
|
|||
|
||||
import glob
|
||||
import json
|
||||
import numpy as np
|
||||
import os
|
||||
import pandas
|
||||
import tarfile
|
||||
|
||||
import numpy as np
|
||||
import pandas
|
||||
|
||||
from deepspeech_training.util.importers import get_importers_parser
|
||||
|
||||
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
|
|
|
@ -3,20 +3,26 @@ from __future__ import absolute_import, division, print_function
|
|||
|
||||
import csv
|
||||
import os
|
||||
import progressbar
|
||||
import re
|
||||
import sox
|
||||
import subprocess
|
||||
import tarfile
|
||||
import unicodedata
|
||||
import zipfile
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
|
||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
|
||||
from deepspeech_training.util.text import Alphabet
|
||||
from glob import glob
|
||||
from multiprocessing import Pool
|
||||
|
||||
import progressbar
|
||||
import sox
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
get_importers_parser,
|
||||
get_validate_label,
|
||||
print_import_report
|
||||
)
|
||||
from deepspeech_training.util.text import Alphabet
|
||||
|
||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
SAMPLE_RATE = 16000
|
||||
|
|
|
@ -6,19 +6,20 @@ from __future__ import absolute_import, division, print_function
|
|||
# from the deepspeech directory run with: ./bin/import_swb.py ./data/swb/
|
||||
import codecs
|
||||
import fnmatch
|
||||
import librosa
|
||||
import os
|
||||
import pandas
|
||||
import requests
|
||||
import soundfile # <= Has an external dependency on libsndfile
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
import unicodedata
|
||||
import wave
|
||||
|
||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
||||
import librosa
|
||||
import pandas
|
||||
import requests
|
||||
import soundfile # <= Has an external dependency on libsndfile
|
||||
|
||||
from deepspeech_training.util.importers import \
|
||||
validate_label_eng as validate_label
|
||||
|
||||
# ARCHIVE_NAME refers to ISIP alignments from 01/29/03
|
||||
ARCHIVE_NAME = 'switchboard_word_alignments.tar.gz'
|
||||
|
|
|
@ -8,23 +8,25 @@ from __future__ import absolute_import, division, print_function
|
|||
import argparse
|
||||
import csv
|
||||
import os
|
||||
import progressbar
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import sox
|
||||
import sys
|
||||
import tarfile
|
||||
import unicodedata
|
||||
import wave
|
||||
import xml.etree.cElementTree as ET
|
||||
|
||||
from glob import glob
|
||||
from collections import Counter
|
||||
from glob import glob
|
||||
from multiprocessing.pool import ThreadPool
|
||||
|
||||
import progressbar
|
||||
import sox
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import \
|
||||
validate_label_eng as validate_label
|
||||
from deepspeech_training.util.text import Alphabet
|
||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
||||
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
|
||||
|
||||
SWC_URL = "https://www2.informatik.uni-hamburg.de/nats/pub/SWC/SWC_{language}.tar"
|
||||
SWC_ARCHIVE = "SWC_{language}.tar"
|
||||
|
|
|
@ -1,17 +1,18 @@
|
|||
#!/usr/bin/env python
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import pandas
|
||||
import sys
|
||||
import tarfile
|
||||
import unicodedata
|
||||
import wave
|
||||
|
||||
from glob import glob
|
||||
from os import makedirs, path, remove, rmdir
|
||||
|
||||
import pandas
|
||||
from sox import Transformer
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
from tensorflow.python.platform import gfile
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
from deepspeech_training.util.stm import parse_stm_file
|
||||
|
||||
|
||||
|
|
|
@ -11,13 +11,15 @@
|
|||
'''
|
||||
|
||||
import errno
|
||||
import fnmatch
|
||||
import os
|
||||
from os import path
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
import fnmatch
|
||||
from os import path
|
||||
|
||||
import pandas as pd
|
||||
import subprocess
|
||||
|
||||
|
||||
def clean(word):
|
||||
# LC ALL & strip punctuation which are not required
|
||||
|
|
|
@ -3,18 +3,23 @@ from __future__ import absolute_import, division, print_function
|
|||
|
||||
import csv
|
||||
import os
|
||||
import progressbar
|
||||
import re
|
||||
import sox
|
||||
import subprocess
|
||||
import unidecode
|
||||
import zipfile
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
|
||||
from multiprocessing import Pool
|
||||
|
||||
import progressbar
|
||||
import sox
|
||||
|
||||
import unidecode
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
get_importers_parser,
|
||||
get_validate_label,
|
||||
print_import_report
|
||||
)
|
||||
|
||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
SAMPLE_RATE = 16000
|
||||
|
|
|
@ -8,15 +8,17 @@ from __future__ import absolute_import, division, print_function
|
|||
import argparse
|
||||
import csv
|
||||
import os
|
||||
import progressbar
|
||||
import tarfile
|
||||
import unicodedata
|
||||
import wave
|
||||
import xml.etree.cElementTree as ET
|
||||
|
||||
from collections import Counter
|
||||
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
|
||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
||||
|
||||
import progressbar
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import \
|
||||
validate_label_eng as validate_label
|
||||
from deepspeech_training.util.text import Alphabet
|
||||
|
||||
TUDA_VERSION = 'v2'
|
||||
|
|
|
@ -4,17 +4,21 @@
|
|||
# as per https://homepages.inf.ed.ac.uk/jyamagis/page3/page58/page58.html
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import librosa
|
||||
import os
|
||||
import progressbar
|
||||
import random
|
||||
import re
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
|
||||
from deepspeech_training.util.importers import get_counter, get_imported_samples, print_import_report
|
||||
from multiprocessing import Pool
|
||||
from zipfile import ZipFile
|
||||
|
||||
import librosa
|
||||
import progressbar
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
print_import_report
|
||||
)
|
||||
|
||||
SAMPLE_RATE = 16000
|
||||
MAX_SECS = 10
|
||||
|
|
|
@ -3,20 +3,21 @@ from __future__ import absolute_import, division, print_function
|
|||
|
||||
import codecs
|
||||
import os
|
||||
import pandas
|
||||
import re
|
||||
import tarfile
|
||||
import threading
|
||||
import unicodedata
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
from glob import glob
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from os import makedirs, path
|
||||
|
||||
import pandas
|
||||
from bs4 import BeautifulSoup
|
||||
from six.moves import urllib
|
||||
from tensorflow.python.platform import gfile
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
|
||||
"""The number of jobs to run in parallel"""
|
||||
NUM_PARALLEL = 8
|
||||
|
||||
|
@ -188,7 +189,3 @@ def _generate_dataset(data_dir, data_set):
|
|||
|
||||
if __name__=="__main__":
|
||||
_download_and_preprocess_data(sys.argv[1])
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import tensorflow.compat.v1 as tfv1
|
||||
import sys
|
||||
|
||||
import tensorflow.compat.v1 as tfv1
|
||||
|
||||
|
||||
def main():
|
||||
with tfv1.gfile.FastGFile(sys.argv[1], 'rb') as fin:
|
||||
graph_def = tfv1.GraphDef()
|
||||
|
|
|
@ -10,7 +10,10 @@ import random
|
|||
import sys
|
||||
|
||||
from deepspeech_training.util.audio import AUDIO_TYPE_PCM
|
||||
from deepspeech_training.util.sample_collections import samples_from_file, LabeledSample
|
||||
from deepspeech_training.util.sample_collections import (
|
||||
LabeledSample,
|
||||
samples_from_file
|
||||
)
|
||||
|
||||
|
||||
def play_sample(samples, index):
|
||||
|
|
Загрузка…
Ссылка в новой задаче