remove redundant files

2018-05-10 16:36:07 -07:00 · 2018-05-10 16:36:07 -07:00 · 7c40455edd
--- a/models/tacotron.py
+++ b/models/tacotron.py
@ -1,6 +1,5 @@
 # coding: utf-8
 import torch
-from torch.autograd import Variable
 from torch import nn
 from TTS.utils.text.symbols import symbols
 from TTS.layers.tacotron import Prenet, Encoder, Decoder, CBHG
--- a/module.py
+++ b/module.py
@ -1,312 +0,0 @@
-import torch
-from torch.autograd import Variable
-import torch.nn as nn
-import torch.nn.functional as F
-from collections import OrderedDict
-import numpy as np
-
-use_cuda = torch.cuda.is_available()
-
-
-class SeqLinear(nn.Module):
-    """
-    Linear layer for sequences
-    """
-
-    def __init__(self, input_size, output_size, time_dim=2):
-        """
-        :param input_size: dimension of input
-        :param output_size: dimension of output
-        :param time_dim: index of time dimension
-        """
-        super(SeqLinear, self).__init__()
-        self.input_size = input_size
-        self.output_size = output_size
-        self.time_dim = time_dim
-        self.linear = nn.Linear(input_size, output_size)
-
-    def forward(self, input_):
-        """
-
-        :param input_: sequences
-        :return: outputs
-        """
-        batch_size = input_.size()[0]
-        if self.time_dim == 2:
-            input_ = input_.transpose(1, 2).contiguous()
-        input_ = input_.view(-1, self.input_size)
-
-        out = self.linear(input_).view(batch_size, -1, self.output_size)
-
-        if self.time_dim == 2:
-            out = out.contiguous().transpose(1, 2)
-
-        return out
-
-
-class Prenet(nn.Module):
-    """
-    Prenet before passing through the network
-    """
-
-    def __init__(self, input_size, hidden_size, output_size):
-        """
-
-        :param input_size: dimension of input
-        :param hidden_size: dimension of hidden unit
-        :param output_size: dimension of output
-        """
-        super(Prenet, self).__init__()
-        self.input_size = input_size
-        self.output_size = output_size
-        self.hidden_size = hidden_size
-        self.layer = nn.Sequential(OrderedDict([
-            ('fc1', SeqLinear(self.input_size, self.hidden_size)),
-            ('relu1', nn.ReLU()),
-            ('dropout1', nn.Dropout(0.5)),
-            ('fc2', SeqLinear(self.hidden_size, self.output_size)),
-            ('relu2', nn.ReLU()),
-            ('dropout2', nn.Dropout(0.5)),
-        ]))
-
-    def forward(self, input_):
-
-        out = self.layer(input_)
-
-        return out
-
-
-class CBHG(nn.Module):
-    """
-    CBHG Module
-    """
-
-    def __init__(self, hidden_size, K=16, projection_size=128, num_gru_layers=2, max_pool_kernel_size=2, is_post=False):
-        """
-
-        :param hidden_size: dimension of hidden unit
-        :param K: # of convolution banks
-        :param projection_size: dimension of projection unit
-        :param num_gru_layers: # of layers of GRUcell
-        :param max_pool_kernel_size: max pooling kernel size
-        :param is_post: whether post processing or not
-        """
-        super(CBHG, self).__init__()
-        self.hidden_size = hidden_size
-        self.num_gru_layers = num_gru_layers
-        self.projection_size = projection_size
-        self.convbank_list = nn.ModuleList()
-        self.convbank_list.append(nn.Conv1d(in_channels=projection_size,
-                                            out_channels=hidden_size,
-                                            kernel_size=1,
-                                            padding=int(np.floor(1 / 2))))
-
-        for i in range(2, K + 1):
-            self.convbank_list.append(nn.Conv1d(in_channels=hidden_size,
-                                                out_channels=hidden_size,
-                                                kernel_size=i,
-                                                padding=int(np.floor(i / 2))))
-
-        self.batchnorm_list = nn.ModuleList()
-        for i in range(1, K + 1):
-            self.batchnorm_list.append(nn.BatchNorm1d(hidden_size))
-
-        convbank_outdim = hidden_size * K
-        if is_post:
-            self.conv_projection_1 = nn.Conv1d(in_channels=convbank_outdim,
-                                               out_channels=hidden_size * 2,
-                                               kernel_size=3,
-                                               padding=int(np.floor(3 / 2)))
-            self.conv_projection_2 = nn.Conv1d(in_channels=hidden_size * 2,
-                                               out_channels=projection_size,
-                                               kernel_size=3,
-                                               padding=int(np.floor(3 / 2)))
-            self.batchnorm_proj_1 = nn.BatchNorm1d(hidden_size * 2)
-
-        else:
-            self.conv_projection_1 = nn.Conv1d(in_channels=convbank_outdim,
-                                               out_channels=hidden_size,
-                                               kernel_size=3,
-                                               padding=int(np.floor(3 / 2)))
-            self.conv_projection_2 = nn.Conv1d(in_channels=hidden_size,
-                                               out_channels=projection_size,
-                                               kernel_size=3,
-                                               padding=int(np.floor(3 / 2)))
-            self.batchnorm_proj_1 = nn.BatchNorm1d(hidden_size)
-
-        self.batchnorm_proj_2 = nn.BatchNorm1d(projection_size)
-
-        self.max_pool = nn.MaxPool1d(max_pool_kernel_size, stride=1, padding=1)
-        self.highway = Highwaynet(self.projection_size)
-        self.gru = nn.GRU(self.projection_size, self.hidden_size, num_layers=2,
-                          batch_first=True,
-                          bidirectional=True)
-
-    def _conv_fit_dim(self, x, kernel_size=3):
-        if kernel_size % 2 == 0:
-            return x[:, :, :-1]
-        else:
-            return x
-
-    def forward(self, input_):
-
-        input_ = input_.contiguous()
-        batch_size = input_.size()[0]
-
-        convbank_list = list()
-        convbank_input = input_
-
-        # Convolution bank filters
-        for k, (conv, batchnorm) in enumerate(zip(self.convbank_list, self.batchnorm_list)):
-            convbank_input = F.relu(batchnorm(self._conv_fit_dim(
-                conv(convbank_input), k + 1).contiguous()))
-            convbank_list.append(convbank_input)
-
-        # Concatenate all features
-        conv_cat = torch.cat(convbank_list, dim=1)
-
-        # Max pooling
-        conv_cat = self.max_pool(conv_cat)[:, :, :-1]
-
-        # Projection
-        conv_projection = F.relu(self.batchnorm_proj_1(
-            self._conv_fit_dim(self.conv_projection_1(conv_cat))))
-        conv_projection = self.batchnorm_proj_2(self._conv_fit_dim(
-            self.conv_projection_2(conv_projection))) + input_
-
-        # Highway networks
-        highway = self.highway.forward(conv_projection)
-        highway = torch.transpose(highway, 1, 2)
-
-        # Bidirectional GRU
-        if use_cuda:
-            init_gru = Variable(torch.zeros(
-                2 * self.num_gru_layers, batch_size, self.hidden_size)).cuda()
-        else:
-            init_gru = Variable(torch.zeros(
-                2 * self.num_gru_layers, batch_size, self.hidden_size))
-
-        self.gru.flatten_parameters()
-        out, _ = self.gru(highway, init_gru)
-
-        return out
-
-
-class Highwaynet(nn.Module):
-    """
-    Highway network
-    """
-
-    def __init__(self, num_units, num_layers=4):
-        """
-
-        :param num_units: dimension of hidden unit
-        :param num_layers: # of highway layers
-        """
-        super(Highwaynet, self).__init__()
-        self.num_units = num_units
-        self.num_layers = num_layers
-        self.gates = nn.ModuleList()
-        self.linears = nn.ModuleList()
-        for _ in range(self.num_layers):
-            self.linears.append(SeqLinear(num_units, num_units))
-            self.gates.append(SeqLinear(num_units, num_units))
-
-    def forward(self, input_):
-
-        out = input_
-
-        # highway gated function
-        for fc1, fc2 in zip(self.linears, self.gates):
-
-            h = F.relu(fc1.forward(out))
-            t = F.sigmoid(fc2.forward(out))
-
-            c = 1. - t
-            out = h * t + out * c
-
-        return out
-
-
-class AttentionDecoder(nn.Module):
-    """
-    Decoder with attention mechanism (Vinyals et al.)
-    """
-
-    def __init__(self, num_units, num_mels, outputs_per_step):
-        """
-
-        :param num_units: dimension of hidden units
-        """
-        super(AttentionDecoder, self).__init__()
-        self.num_units = num_units
-        self.num_mels = num_mels
-        self.outputs_per_step = outputs_per_step
-
-        self.v = nn.Linear(num_units, 1, bias=False)
-        self.W1 = nn.Linear(num_units, num_units, bias=False)
-        self.W2 = nn.Linear(num_units, num_units, bias=False)
-
-        self.attn_grucell = nn.GRUCell(num_units // 2, num_units)
-        self.gru1 = nn.GRUCell(num_units, num_units)
-        self.gru2 = nn.GRUCell(num_units, num_units)
-
-        self.attn_projection = nn.Linear(num_units * 2, num_units)
-        self.out = nn.Linear(num_units, num_mels * outputs_per_step)
-
-    def forward(self, decoder_input, memory, attn_hidden, gru1_hidden, gru2_hidden):
-
-        memory_len = memory.size()[1]
-        batch_size = memory.size()[0]
-
-        # Get keys
-        keys = self.W1(memory.contiguous().view(-1, self.num_units))
-        keys = keys.view(-1, memory_len, self.num_units)
-
-        # Get hidden state (query) passed through GRUcell
-        d_t = self.attn_grucell(decoder_input, attn_hidden)
-
-        # Duplicate query with same dimension of keys for matrix operation (Speed up)
-        d_t_duplicate = self.W2(d_t).unsqueeze(1).expand_as(memory)
-
-        # Calculate attention score and get attention weights
-        attn_weights = self.v(
-            F.tanh(keys + d_t_duplicate).view(-1, self.num_units)).view(-1, memory_len, 1)
-        attn_weights = attn_weights.squeeze(2)
-        attn_weights = F.softmax(attn_weights, dim=0)
-
-        # Concatenate with original query
-        d_t_prime = torch.bmm(attn_weights.view(
-            [batch_size, 1, -1]), memory).squeeze(1)
-
-        # Residual GRU
-        gru1_input = self.attn_projection(torch.cat([d_t, d_t_prime], 1))
-        gru1_hidden = self.gru1(gru1_input, gru1_hidden)
-        gru2_input = gru1_input + gru1_hidden
-
-        gru2_hidden = self.gru2(gru2_input, gru2_hidden)
-        bf_out = gru2_input + gru2_hidden
-
-        # Output
-        output = self.out(bf_out).view(-1, self.num_mels,
-                                       self.outputs_per_step)
-
-        return output, d_t, gru1_hidden, gru2_hidden
-
-    def inithidden(self, batch_size):
-        if use_cuda:
-            attn_hidden = Variable(torch.zeros(
-                batch_size, self.num_units), requires_grad=False).cuda()
-            gru1_hidden = Variable(torch.zeros(
-                batch_size, self.num_units), requires_grad=False).cuda()
-            gru2_hidden = Variable(torch.zeros(
-                batch_size, self.num_units), requires_grad=False).cuda()
-        else:
-            attn_hidden = Variable(torch.zeros(
-                batch_size, self.num_units), requires_grad=False)
-            gru1_hidden = Variable(torch.zeros(
-                batch_size, self.num_units), requires_grad=False)
-            gru2_hidden = Variable(torch.zeros(
-                batch_size, self.num_units), requires_grad=False)
-
-        return attn_hidden, gru1_hidden, gru2_hidden
--- a/synthesis.py
+++ b/synthesis.py
@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from network import *
-from data import inv_spectrogram, find_endpoint, save_wav, spectrogram
-import numpy as np
-import argparse
-import os
-import sys
-import io
-from text import text_to_sequence
-
-use_cuda = torch.cuda.is_available()
-
-
-def main(args):
-
-    # Make model
-    if use_cuda:
-        model = nn.DataParallel(Tacotron().cuda())
-
-    # Load checkpoint
-    try:
-        checkpoint = torch.load(os.path.join(
-            hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step))
-        model.load_state_dict(checkpoint['model'])
-        print("\n--------model restored at step %d--------\n" %
-              args.restore_step)
-
-    except:
-        raise FileNotFoundError("\n------------Model not exists------------\n")
-
-    # Evaluation
-    model = model.eval()
-
-    # Make result folder if not exists
-    if not os.path.exists(hp.output_path):
-        os.mkdir(hp.output_path)
-
-    # Sentences for generation
-    sentences = [
-        "I try my best to translate text to speech. But I know I need more work",
-        "The new Firefox, Fast for good.",
-        "Technology is continually providing us with new ways to create and publish stories.",
-        "For these stories to achieve their full impact, it requires tool.",
-        "I am allien and I am here to destron your world."
-    ]
-
-    # Synthesis and save to wav files
-    for i, text in enumerate(sentences):
-        wav = generate(model, text)
-        path = os.path.join(hp.output_path, 'result_%d_%d.wav' %
-                            (args.restore_step, i + 1))
-        with open(path, 'wb') as f:
-            f.write(wav)
-
-        f.close()
-        print("save wav file at step %d ..." % (i + 1))
-
-
-def generate(model, text):
-
-    # Text to index sequence
-    cleaner_names = [x.strip() for x in hp.cleaners.split(',')]
-    seq = np.expand_dims(np.asarray(text_to_sequence(
-        text, cleaner_names), dtype=np.int32), axis=0)
-
-    # Provide [GO] Frame
-    mel_input = np.zeros([seq.shape[0], hp.num_mels, 1], dtype=np.float32)
-
-    # Variables
-    characters = Variable(torch.from_numpy(seq).type(
-        torch.cuda.LongTensor), volatile=True).cuda()
-    mel_input = Variable(torch.from_numpy(mel_input).type(
-        torch.cuda.FloatTensor), volatile=True).cuda()
-
-    # Spectrogram to wav
-    _, linear_output = model.forward(characters, mel_input)
-    wav = inv_spectrogram(linear_output[0].data.cpu().numpy())
-    wav = wav[:find_endpoint(wav)]
-    out = io.BytesIO()
-    save_wav(wav, out)
-
-    return out.getvalue()
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--restore_step', type=int,
-                        help='Global step to restore checkpoint', default=0)
-    parser.add_argument('--batch_size', type=int, help='Batch size', default=1)
-    args = parser.parse_args()
-    main(args)