зеркало из https://github.com/mozilla/TTS.git
Merge branch 'dev' of github.com:mozilla/TTS into stylemel_in_testing
This commit is contained in:
Коммит
11f9edd849
12
config.json
12
config.json
|
@ -40,12 +40,12 @@
|
|||
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
||||
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||
"prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn".
|
||||
"prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet.
|
||||
"use_forward_attn": true, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
|
||||
"forward_attn_mask": false,
|
||||
"transition_agent": false, // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
|
||||
"location_attn": false, // ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||
"prenet_type": "original", // "original" or "bn".
|
||||
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||
"use_forward_attn": true, // enable/disable forward attention. In general, it aligns faster.
|
||||
"forward_attn_mask": false, // Apply forward attention mask at inference to prevent bad modes. Try it if your model does not align well.
|
||||
"transition_agent": false, // enable/disable transition agent of forward attention.
|
||||
"location_attn": false, // enable_disable location sensitive attention.
|
||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
||||
|
|
|
@ -39,13 +39,13 @@
|
|||
"warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr"
|
||||
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||
"prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn".
|
||||
"prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet.
|
||||
"prenet_type": "original", // "original" or "bn".
|
||||
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
||||
"use_forward_attn": false, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
|
||||
"use_forward_attn": false, // enable/disable forward attention. In general, it aligns faster.
|
||||
"forward_attn_mask": false,
|
||||
"transition_agent": false, // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
|
||||
"location_attn": true, // ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||
"transition_agent": false, // enable/disable transition agent of forward attention.
|
||||
"location_attn": true, // enable_disable location sensitive attention.
|
||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
||||
|
|
|
@ -42,10 +42,10 @@
|
|||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||
"prenet_type": "original", // "original" or "bn".
|
||||
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||
"use_forward_attn": true, // if it uses forward attention. In general, it aligns faster.
|
||||
"use_forward_attn": true, // enable/disable forward attention. In general, it aligns faster.
|
||||
"forward_attn_mask": false, // Apply forward attention mask af inference to prevent bad modes. Try it if your model does not align well.
|
||||
"transition_agent": true, // enable/disable transition agent of forward attention.
|
||||
"location_attn": false, // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||
"location_attn": false, // enable_disable location sensitive attention.
|
||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
||||
|
|
|
@ -39,12 +39,12 @@
|
|||
"warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr"
|
||||
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||
"prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn".
|
||||
"prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet.
|
||||
"use_forward_attn": true, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
|
||||
"prenet_type": "original", // "original" or "bn".
|
||||
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||
"use_forward_attn": true, // enable/disable forward attention. In general, it aligns faster.
|
||||
"forward_attn_mask": false, // Apply forward attention mask af inference to prevent bad modes. Try it if your model does not align well.
|
||||
"transition_agent": false, // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
|
||||
"location_attn": false, // ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||
"transition_agent": false, // enable/disable transition agent of forward attention.
|
||||
"location_attn": false, // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
||||
|
|
|
@ -40,12 +40,12 @@
|
|||
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
||||
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||
"prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn".
|
||||
"prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet.
|
||||
"use_forward_attn": false, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
|
||||
"transition_agent": false, // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
|
||||
"forward_attn_mask": false,
|
||||
"location_attn": true, // ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||
"prenet_type": "original", // "original" or "bn".
|
||||
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||
"use_forward_attn": false, // enable/disable forward attention. In general, it aligns faster.
|
||||
"transition_agent": false, // enable/disable transition agent of forward attention.
|
||||
"forward_attn_mask": false, // Apply forward attention mask at inference to prevent bad modes. Try it if your model does not align well.
|
||||
"location_attn": true, // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
||||
|
|
|
@ -42,8 +42,8 @@
|
|||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||
"prenet_type": "original", // "original" or "bn".
|
||||
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||
"use_forward_attn": true, // if it uses forward attention. In general, it aligns faster.
|
||||
"forward_attn_mask": false, // Apply forward attention mask af inference to prevent bad modes. Try it if your model does not align well.
|
||||
"use_forward_attn": true, // enable/disable forward attention. In general, it aligns faster.
|
||||
"forward_attn_mask": false, // Apply forward attention mask at inference to prevent bad modes. Try it if your model does not align well.
|
||||
"transition_agent": false, // enable/disable transition agent of forward attention.
|
||||
"location_attn": false, // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||
|
|
|
@ -75,21 +75,19 @@ def mailabs(root_path, meta_files=None):
|
|||
speaker_regex = re.compile("by_book/(male|female)/(?P<speaker_name>[^/]+)/")
|
||||
if meta_files is None:
|
||||
csv_files = glob(root_path+"/**/metadata.csv", recursive=True)
|
||||
folders = [os.path.dirname(f) for f in csv_files]
|
||||
else:
|
||||
csv_files = meta_files
|
||||
folders = [f.strip().split("by_book")[1][1:] for f in csv_files]
|
||||
# meta_files = [f.strip() for f in meta_files.split(",")]
|
||||
items = []
|
||||
for idx, csv_file in enumerate(csv_files):
|
||||
for csv_file in csv_files:
|
||||
txt_file = os.path.join(root_path, csv_file)
|
||||
folder = os.path.dirname(txt_file)
|
||||
# determine speaker based on folder structure...
|
||||
speaker_name_match = speaker_regex.search(csv_file)
|
||||
speaker_name_match = speaker_regex.search(txt_file)
|
||||
if speaker_name_match is None:
|
||||
continue
|
||||
speaker_name = speaker_name_match.group("speaker_name")
|
||||
print(" | > {}".format(csv_file))
|
||||
folder = folders[idx]
|
||||
txt_file = os.path.join(root_path, csv_file)
|
||||
with open(txt_file, 'r') as ttf:
|
||||
for line in ttf:
|
||||
cols = line.split('|')
|
||||
|
|
Загрузка…
Ссылка в новой задаче