diff --git a/config.json b/config.json index d8af7a9..f0615e4 100644 --- a/config.json +++ b/config.json @@ -1,6 +1,6 @@ { - "model_name": "TTS-weight-decay", - "model_description": "Weight decay as in FastAI", + "model_name": "TTS-sigmoid", + "model_description": "Net outputting Sigmoid unit", "audio_processor": "audio", "num_mels": 80, "num_freq": 1025, diff --git a/layers/tacotron.py b/layers/tacotron.py index 6ab06d7..835abde 100644 --- a/layers/tacotron.py +++ b/layers/tacotron.py @@ -374,6 +374,7 @@ class Decoder(nn.Module): decoder_output = decoder_input # predict mel vectors from decoder vectors output = self.proj_to_mel(decoder_output) + output = torch.sigmoid(output) stop_input = output # predict stop token stop_token, stopnet_rnn_hidden = self.stopnet( diff --git a/models/tacotron.py b/models/tacotron.py index cd023a9..8a215b9 100644 --- a/models/tacotron.py +++ b/models/tacotron.py @@ -23,7 +23,9 @@ class Tacotron(nn.Module): self.encoder = Encoder(embedding_dim) self.decoder = Decoder(256, mel_dim, r) self.postnet = PostCBHG(mel_dim) - self.last_linear = nn.Linear(self.postnet.cbhg.gru_features * 2, linear_dim) + self.last_linear = nn.Sequential( + nn.Linear(self.postnet.cbhg.gru_features * 2, linear_dim), + nn.Sigmoid()) def forward(self, characters, mel_specs=None, mask=None): B = characters.size(0)