зеркало из https://github.com/mozilla/TTS.git
turkish cleaner and data preprocessor
This commit is contained in:
Родитель
fd4e6d0245
Коммит
fa795347a9
|
@ -187,3 +187,21 @@ def libri_tts(root_path, meta_files=None):
|
|||
for item in items:
|
||||
assert os.path.exists(item[1]), f" [!] wav file is not exist - {item[1]}"
|
||||
return items
|
||||
|
||||
|
||||
def custom_turkish(root_path, meta_file):
|
||||
txt_file = os.path.join(root_path, meta_file)
|
||||
items = []
|
||||
speaker_name = "turkish-female"
|
||||
skipped_files = []
|
||||
with open(txt_file, 'r', encoding='utf-8') as ttf:
|
||||
for line in ttf:
|
||||
cols = line.split('|')
|
||||
wav_file = os.path.join(root_path, 'wavs', cols[0].strip() + '.wav')
|
||||
if not os.path.exists(wav_file):
|
||||
skipped_files.append(wav_file)
|
||||
continue
|
||||
text = cols[1].strip()
|
||||
items.append([text, wav_file, speaker_name])
|
||||
print(f" [!] {len(skipped_files)} files skipped. They are not exist...")
|
||||
return items
|
||||
|
|
|
@ -91,6 +91,14 @@ def transliteration_cleaners(text):
|
|||
return text
|
||||
|
||||
|
||||
# TODO: elaborate it
|
||||
def basic_turkish_cleaners(text):
|
||||
'''Pipeline for Turkish text'''
|
||||
text = text.replace("I", "ı")
|
||||
text = lowercase(text)
|
||||
text = collapse_whitespace(text)
|
||||
|
||||
|
||||
def english_cleaners(text):
|
||||
'''Pipeline for English text, including number and abbreviation expansion.'''
|
||||
text = convert_to_ascii(text)
|
||||
|
|
Загрузка…
Ссылка в новой задаче