зеркало из https://github.com/mozilla/TTS.git
Merge branch 'dev' of github.com:mozilla/TTS into stylemel_in_testing
This commit is contained in:
Коммит
11f9edd849
12
config.json
12
config.json
|
@ -40,12 +40,12 @@
|
||||||
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
||||||
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
||||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||||
"prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn".
|
"prenet_type": "original", // "original" or "bn".
|
||||||
"prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet.
|
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||||
"use_forward_attn": true, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
|
"use_forward_attn": true, // enable/disable forward attention. In general, it aligns faster.
|
||||||
"forward_attn_mask": false,
|
"forward_attn_mask": false, // Apply forward attention mask at inference to prevent bad modes. Try it if your model does not align well.
|
||||||
"transition_agent": false, // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
|
"transition_agent": false, // enable/disable transition agent of forward attention.
|
||||||
"location_attn": false, // ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
"location_attn": false, // enable_disable location sensitive attention.
|
||||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||||
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
||||||
|
|
|
@ -39,13 +39,13 @@
|
||||||
"warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr"
|
"warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr"
|
||||||
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
||||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||||
"prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn".
|
"prenet_type": "original", // "original" or "bn".
|
||||||
"prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet.
|
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||||
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
||||||
"use_forward_attn": false, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
|
"use_forward_attn": false, // enable/disable forward attention. In general, it aligns faster.
|
||||||
"forward_attn_mask": false,
|
"forward_attn_mask": false,
|
||||||
"transition_agent": false, // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
|
"transition_agent": false, // enable/disable transition agent of forward attention.
|
||||||
"location_attn": true, // ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
"location_attn": true, // enable_disable location sensitive attention.
|
||||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||||
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
||||||
|
|
|
@ -42,10 +42,10 @@
|
||||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||||
"prenet_type": "original", // "original" or "bn".
|
"prenet_type": "original", // "original" or "bn".
|
||||||
"prenet_dropout": true, // enable/disable dropout at prenet.
|
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||||
"use_forward_attn": true, // if it uses forward attention. In general, it aligns faster.
|
"use_forward_attn": true, // enable/disable forward attention. In general, it aligns faster.
|
||||||
"forward_attn_mask": false, // Apply forward attention mask af inference to prevent bad modes. Try it if your model does not align well.
|
"forward_attn_mask": false, // Apply forward attention mask af inference to prevent bad modes. Try it if your model does not align well.
|
||||||
"transition_agent": true, // enable/disable transition agent of forward attention.
|
"transition_agent": true, // enable/disable transition agent of forward attention.
|
||||||
"location_attn": false, // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
"location_attn": false, // enable_disable location sensitive attention.
|
||||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||||
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
||||||
|
|
|
@ -39,12 +39,12 @@
|
||||||
"warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr"
|
"warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr"
|
||||||
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
||||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||||
"prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn".
|
"prenet_type": "original", // "original" or "bn".
|
||||||
"prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet.
|
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||||
"use_forward_attn": true, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
|
"use_forward_attn": true, // enable/disable forward attention. In general, it aligns faster.
|
||||||
"forward_attn_mask": false, // Apply forward attention mask af inference to prevent bad modes. Try it if your model does not align well.
|
"forward_attn_mask": false, // Apply forward attention mask af inference to prevent bad modes. Try it if your model does not align well.
|
||||||
"transition_agent": false, // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
|
"transition_agent": false, // enable/disable transition agent of forward attention.
|
||||||
"location_attn": false, // ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
"location_attn": false, // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||||
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
||||||
|
|
|
@ -40,12 +40,12 @@
|
||||||
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
||||||
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
"memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5.
|
||||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||||
"prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn".
|
"prenet_type": "original", // "original" or "bn".
|
||||||
"prenet_dropout": true, // ONLY TACOTRON2 - enable/disable dropout at prenet.
|
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||||
"use_forward_attn": false, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster.
|
"use_forward_attn": false, // enable/disable forward attention. In general, it aligns faster.
|
||||||
"transition_agent": false, // ONLY TACOTRON2 - enable/disable transition agent of forward attention.
|
"transition_agent": false, // enable/disable transition agent of forward attention.
|
||||||
"forward_attn_mask": false,
|
"forward_attn_mask": false, // Apply forward attention mask at inference to prevent bad modes. Try it if your model does not align well.
|
||||||
"location_attn": true, // ONLY TACOTRON2 - enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
"location_attn": true, // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||||
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
"stopnet": true, // Train stopnet predicting the end of synthesis.
|
||||||
|
|
|
@ -42,8 +42,8 @@
|
||||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||||
"prenet_type": "original", // "original" or "bn".
|
"prenet_type": "original", // "original" or "bn".
|
||||||
"prenet_dropout": true, // enable/disable dropout at prenet.
|
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||||
"use_forward_attn": true, // if it uses forward attention. In general, it aligns faster.
|
"use_forward_attn": true, // enable/disable forward attention. In general, it aligns faster.
|
||||||
"forward_attn_mask": false, // Apply forward attention mask af inference to prevent bad modes. Try it if your model does not align well.
|
"forward_attn_mask": false, // Apply forward attention mask at inference to prevent bad modes. Try it if your model does not align well.
|
||||||
"transition_agent": false, // enable/disable transition agent of forward attention.
|
"transition_agent": false, // enable/disable transition agent of forward attention.
|
||||||
"location_attn": false, // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
"location_attn": false, // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||||
|
|
|
@ -75,21 +75,19 @@ def mailabs(root_path, meta_files=None):
|
||||||
speaker_regex = re.compile("by_book/(male|female)/(?P<speaker_name>[^/]+)/")
|
speaker_regex = re.compile("by_book/(male|female)/(?P<speaker_name>[^/]+)/")
|
||||||
if meta_files is None:
|
if meta_files is None:
|
||||||
csv_files = glob(root_path+"/**/metadata.csv", recursive=True)
|
csv_files = glob(root_path+"/**/metadata.csv", recursive=True)
|
||||||
folders = [os.path.dirname(f) for f in csv_files]
|
|
||||||
else:
|
else:
|
||||||
csv_files = meta_files
|
csv_files = meta_files
|
||||||
folders = [f.strip().split("by_book")[1][1:] for f in csv_files]
|
|
||||||
# meta_files = [f.strip() for f in meta_files.split(",")]
|
# meta_files = [f.strip() for f in meta_files.split(",")]
|
||||||
items = []
|
items = []
|
||||||
for idx, csv_file in enumerate(csv_files):
|
for csv_file in csv_files:
|
||||||
|
txt_file = os.path.join(root_path, csv_file)
|
||||||
|
folder = os.path.dirname(txt_file)
|
||||||
# determine speaker based on folder structure...
|
# determine speaker based on folder structure...
|
||||||
speaker_name_match = speaker_regex.search(csv_file)
|
speaker_name_match = speaker_regex.search(txt_file)
|
||||||
if speaker_name_match is None:
|
if speaker_name_match is None:
|
||||||
continue
|
continue
|
||||||
speaker_name = speaker_name_match.group("speaker_name")
|
speaker_name = speaker_name_match.group("speaker_name")
|
||||||
print(" | > {}".format(csv_file))
|
print(" | > {}".format(csv_file))
|
||||||
folder = folders[idx]
|
|
||||||
txt_file = os.path.join(root_path, csv_file)
|
|
||||||
with open(txt_file, 'r') as ttf:
|
with open(txt_file, 'r') as ttf:
|
||||||
for line in ttf:
|
for line in ttf:
|
||||||
cols = line.split('|')
|
cols = line.split('|')
|
||||||
|
|
Загрузка…
Ссылка в новой задаче