зеркало из https://github.com/mozilla/TTS.git
split train and validation steps
This commit is contained in:
Родитель
793563b586
Коммит
021ac3978d
|
@ -20,11 +20,10 @@
|
|||
"griffin_lim_iters": 60,
|
||||
"power": 1.5,
|
||||
|
||||
"num_loader_workers": 32,
|
||||
"num_loader_workers": 16,
|
||||
|
||||
"checkpoint": false,
|
||||
"save_step": 69,
|
||||
"data_path": "/data/shared/KeithIto/LJSpeech-1.0",
|
||||
"data_path": "/run/shm/erogol/LJSpeech-1.0",
|
||||
"output_path": "result",
|
||||
"log_dir": "/home/erogol/projects/TTS/logs/"
|
||||
}
|
||||
|
|
|
@ -16,16 +16,15 @@ class LJSpeechDataset(Dataset):
|
|||
text_cleaner, num_mels, min_level_db, frame_shift_ms,
|
||||
frame_length_ms, preemphasis, ref_level_db, num_freq, power):
|
||||
|
||||
f = open(csv_file, "r")
|
||||
self.frames = [line.split('|') for line in f]
|
||||
f.close()
|
||||
with open(csv_file, "r") as f:
|
||||
self.frames = [line.split('|') for line in f]
|
||||
self.frames = self.frames[:256]
|
||||
self.root_dir = root_dir
|
||||
self.outputs_per_step = outputs_per_step
|
||||
self.sample_rate = sample_rate
|
||||
self.cleaners = text_cleaner
|
||||
self.ap = AudioProcessor(sample_rate, num_mels, min_level_db, frame_shift_ms,
|
||||
frame_length_ms, preemphasis, ref_level_db, num_freq, power
|
||||
)
|
||||
frame_length_ms, preemphasis, ref_level_db, num_freq, power)
|
||||
print(" > Reading LJSpeech from - {}".format(root_dir))
|
||||
print(" | > Number of instances : {}".format(len(self.frames)))
|
||||
|
||||
|
@ -41,11 +40,11 @@ class LJSpeechDataset(Dataset):
|
|||
|
||||
def __getitem__(self, idx):
|
||||
wav_name = os.path.join(self.root_dir,
|
||||
self.frames.ix[idx, 0]) + '.wav'
|
||||
self.frames[idx][0]) + '.wav'
|
||||
text = self.frames[idx][1]
|
||||
text = np.asarray(text_to_sequence(text, [self.cleaners]), dtype=np.int32)
|
||||
wav = np.asarray(self.load_wav(wav_name)[0], dtype=np.float32)
|
||||
sample = {'text': text, 'wav': wav, 'item_idx': self.frames.ix[idx, 0]}
|
||||
sample = {'text': text, 'wav': wav, 'item_idx': self.frames[idx][0]}
|
||||
return sample
|
||||
|
||||
def get_dummy_data(self):
|
||||
|
|
483
train.py
483
train.py
|
@ -27,36 +27,265 @@ from utils.visual import plot_alignment, plot_spectrogram
|
|||
from datasets.LJSpeech import LJSpeechDataset
|
||||
from models.tacotron import Tacotron
|
||||
|
||||
|
||||
use_cuda = torch.cuda.is_available()
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--restore_step', type=int,
|
||||
help='Global step to restore checkpoint', default=0)
|
||||
parser.add_argument('--restore_path', type=str,
|
||||
help='Folder path to checkpoints', default=0)
|
||||
parser.add_argument('--config_path', type=str,
|
||||
help='path to config file for training',)
|
||||
args = parser.parse_args()
|
||||
|
||||
# setup output paths and read configs
|
||||
c = load_config(args.config_path)
|
||||
_ = os.path.dirname(os.path.realpath(__file__))
|
||||
OUT_PATH = os.path.join(_, c.output_path)
|
||||
OUT_PATH = create_experiment_folder(OUT_PATH)
|
||||
CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints')
|
||||
shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json'))
|
||||
|
||||
# save config to tmp place to be loaded by subsequent modules.
|
||||
file_name = str(os.getpid())
|
||||
tmp_path = os.path.join("/tmp/", file_name+'_tts')
|
||||
pickle.dump(c, open(tmp_path, "wb"))
|
||||
|
||||
# setup tensorboard
|
||||
LOG_DIR = OUT_PATH
|
||||
tb = SummaryWriter(LOG_DIR)
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
"""Ctrl+C handler to remove empty experiment folder"""
|
||||
print(" !! Pressed Ctrl+C !!")
|
||||
remove_experiment_folder(OUT_PATH)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def train(model, criterion, data_loader, optimizer, epoch):
|
||||
model = model.train()
|
||||
epoch_time = 0
|
||||
|
||||
print(" | > Epoch {}/{}".format(epoch, c.epochs))
|
||||
progbar = Progbar(len(data_loader.dataset) / c.batch_size)
|
||||
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
|
||||
for num_iter, data in enumerate(data_loader):
|
||||
start_time = time.time()
|
||||
|
||||
# setup input data
|
||||
text_input = data[0]
|
||||
text_lengths = data[1]
|
||||
linear_input = data[2]
|
||||
mel_input = data[3]
|
||||
|
||||
current_step = num_iter + args.restore_step + epoch * len(data_loader) + 1
|
||||
|
||||
# setup lr
|
||||
current_lr = lr_decay(c.lr, current_step, c.warmup_steps)
|
||||
for params_group in optimizer.param_groups:
|
||||
params_group['lr'] = current_lr
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
# convert inputs to variables
|
||||
text_input_var = Variable(text_input)
|
||||
mel_spec_var = Variable(mel_input)
|
||||
linear_spec_var = Variable(linear_input, volatile=True)
|
||||
|
||||
# sort sequence by length for curriculum learning
|
||||
# TODO: might be unnecessary
|
||||
sorted_lengths, indices = torch.sort(
|
||||
text_lengths.view(-1), dim=0, descending=True)
|
||||
sorted_lengths = sorted_lengths.long().numpy()
|
||||
text_input_var = text_input_var[indices]
|
||||
mel_spec_var = mel_spec_var[indices]
|
||||
linear_spec_var = linear_spec_var[indices]
|
||||
|
||||
# dispatch data to GPU
|
||||
if use_cuda:
|
||||
text_input_var = text_input_var.cuda()
|
||||
mel_spec_var = mel_spec_var.cuda()
|
||||
linear_spec_var = linear_spec_var.cuda()
|
||||
|
||||
# forward pass
|
||||
mel_output, linear_output, alignments =\
|
||||
model.forward(text_input_var, mel_spec_var,
|
||||
input_lengths= torch.autograd.Variable(torch.cuda.LongTensor(sorted_lengths)))
|
||||
|
||||
# loss computation
|
||||
mel_loss = criterion(mel_output, mel_spec_var)
|
||||
linear_loss = 0.5 * criterion(linear_output, linear_spec_var) \
|
||||
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
||||
linear_spec_var[: ,: ,:n_priority_freq])
|
||||
loss = mel_loss + linear_loss
|
||||
|
||||
# backpass and check the grad norm
|
||||
loss.backward()
|
||||
grad_norm, skip_flag = check_update(model, 0.5, 100)
|
||||
if skip_flag:
|
||||
optimizer.zero_grad()
|
||||
print(" | > Iteration skipped!!")
|
||||
continue
|
||||
optimizer.step()
|
||||
|
||||
step_time = time.time() - start_time
|
||||
epoch_time += step_time
|
||||
|
||||
# update
|
||||
progbar.update(num_iter+1, values=[('total_loss', loss.data[0]),
|
||||
('linear_loss', linear_loss.data[0]),
|
||||
('mel_loss', mel_loss.data[0]),
|
||||
('grad_norm', grad_norm)])
|
||||
|
||||
# Plot Training Iter Stats
|
||||
tb.add_scalar('TrainIterLoss/TotalLoss', loss.data[0], current_step)
|
||||
tb.add_scalar('TrainIterLoss/LinearLoss', linear_loss.data[0],
|
||||
current_step)
|
||||
tb.add_scalar('TrainIterLoss/MelLoss', mel_loss.data[0], current_step)
|
||||
tb.add_scalar('Params/LearningRate', optimizer.param_groups[0]['lr'],
|
||||
current_step)
|
||||
tb.add_scalar('Params/GradNorm', grad_norm, current_step)
|
||||
tb.add_scalar('Time/StepTime', step_time, current_step)
|
||||
|
||||
if current_step % c.save_step == 0:
|
||||
if c.checkpoint:
|
||||
# save model
|
||||
save_checkpoint(model, optimizer, linear_loss.data[0],
|
||||
OUT_PATH, current_step, epoch)
|
||||
|
||||
# Diagnostic visualizations
|
||||
const_spec = linear_output[0].data.cpu().numpy()
|
||||
gt_spec = linear_spec_var[0].data.cpu().numpy()
|
||||
|
||||
const_spec = plot_spectrogram(const_spec, dataset.ap)
|
||||
gt_spec = plot_spectrogram(gt_spec, dataset.ap)
|
||||
tb.add_image('Visual/Reconstruction', const_spec, current_step)
|
||||
tb.add_image('Visual/GroundTruth', gt_spec, current_step)
|
||||
|
||||
align_img = alignments[0].data.cpu().numpy()
|
||||
align_img = plot_alignment(align_img)
|
||||
tb.add_image('Visual/Alignment', align_img, current_step)
|
||||
|
||||
# Sample audio
|
||||
audio_signal = linear_output[0].data.cpu().numpy()
|
||||
dataset.ap.griffin_lim_iters = 60
|
||||
audio_signal = dataset.ap.inv_spectrogram(audio_signal.T)
|
||||
try:
|
||||
tb.add_audio('SampleAudio', audio_signal, current_step,
|
||||
sample_rate=c.sample_rate)
|
||||
except:
|
||||
print("\n > Error at audio signal on TB!!")
|
||||
print(audio_signal.max())
|
||||
print(audio_signal.min())
|
||||
|
||||
avg_linear_loss = np.mean(
|
||||
progbar.sum_values['linear_loss'][0] / max(1, progbar.sum_values['linear_loss'][1]))
|
||||
avg_mel_loss = np.mean(
|
||||
progbar.sum_values['mel_loss'][0] / max(1, progbar.sum_values['mel_loss'][1]))
|
||||
avg_total_loss = avg_mel_loss + avg_linear_loss
|
||||
|
||||
# Plot Training Epoch Stats
|
||||
tb.add_scalar('TrainEpochLoss/TotalLoss', loss.data[0], current_step)
|
||||
tb.add_scalar('TrainEpochLoss/LinearLoss', linear_loss.data[0], current_step)
|
||||
tb.add_scalar('TrainEpochLoss/MelLoss', mel_loss.data[0], current_step)
|
||||
tb.add_scalar('Time/EpochTime', epoch_time, epoch)
|
||||
epoch_time = 0
|
||||
|
||||
return avg_linear_loss, current_step
|
||||
|
||||
|
||||
def evaluate(model, criterion, data_loader, current_step):
|
||||
model = model.train()
|
||||
epoch_time = 0
|
||||
|
||||
print("\n | > Validation")
|
||||
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
|
||||
progbar = Progbar(len(data_loader.dataset) / c.batch_size)
|
||||
|
||||
for num_iter, data in enumerate(data_loader):
|
||||
start_time = time.time()
|
||||
|
||||
# setup input data
|
||||
text_input = data[0]
|
||||
text_lengths = data[1]
|
||||
linear_input = data[2]
|
||||
mel_input = data[3]
|
||||
|
||||
# convert inputs to variables
|
||||
text_input_var = Variable(text_input)
|
||||
mel_spec_var = Variable(mel_input)
|
||||
linear_spec_var = Variable(linear_input, volatile=True)
|
||||
|
||||
# dispatch data to GPU
|
||||
if use_cuda:
|
||||
text_input_var = text_input_var.cuda()
|
||||
mel_spec_var = mel_spec_var.cuda()
|
||||
linear_spec_var = linear_spec_var.cuda()
|
||||
|
||||
# forward pass
|
||||
mel_output, linear_output, alignments =\
|
||||
model.forward(text_input_var, mel_spec_var)
|
||||
|
||||
# loss computation
|
||||
mel_loss = criterion(mel_output, mel_spec_var)
|
||||
linear_loss = 0.5 * criterion(linear_output, linear_spec_var) \
|
||||
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
||||
linear_spec_var[: ,: ,:n_priority_freq])
|
||||
loss = mel_loss + linear_loss
|
||||
|
||||
step_time = time.time() - start_time
|
||||
epoch_time += step_time
|
||||
|
||||
# update
|
||||
progbar.update(num_iter+1, values=[('total_loss', loss.data[0]),
|
||||
('linear_loss', linear_loss.data[0]),
|
||||
('mel_loss', mel_loss.data[0])])
|
||||
|
||||
# Diagnostic visualizations
|
||||
idx = np.random.randint(c.batch_size)
|
||||
const_spec = linear_output[idx].data.cpu().numpy()
|
||||
gt_spec = linear_spec_var[idx].data.cpu().numpy()
|
||||
|
||||
const_spec = plot_spectrogram(const_spec, data_loader.dataset.ap)
|
||||
gt_spec = plot_spectrogram(gt_spec, data_loader.dataset.ap)
|
||||
tb.add_image('ValVisual/Reconstruction', const_spec, current_step)
|
||||
tb.add_image('ValVisual/GroundTruth', gt_spec, current_step)
|
||||
|
||||
align_img = alignments[idx].data.cpu().numpy()
|
||||
align_img = plot_alignment(align_img)
|
||||
tb.add_image('ValVisual/ValidationAlignment', align_img, current_step)
|
||||
|
||||
# Sample audio
|
||||
audio_signal = linear_output[idx].data.cpu().numpy()
|
||||
data_loader.dataset.ap.griffin_lim_iters = 60
|
||||
audio_signal = data_loader.dataset.ap.inv_spectrogram(audio_signal.T)
|
||||
try:
|
||||
tb.add_audio('ValSampleAudio', audio_signal, current_step,
|
||||
sample_rate=c.sample_rate)
|
||||
except:
|
||||
print("\n > Error at audio signal on TB!!")
|
||||
print(audio_signal.max())
|
||||
print(audio_signal.min())
|
||||
|
||||
# compute average losses
|
||||
avg_linear_loss = np.mean(
|
||||
progbar.sum_values['linear_loss'][0] / max(1, progbar.sum_values['linear_loss'][1]))
|
||||
avg_mel_loss = np.mean(
|
||||
progbar.sum_values['mel_loss'][0] / max(1, progbar.sum_values['mel_loss'][1]))
|
||||
avg_total_loss = avg_mel_loss + avg_linear_loss
|
||||
|
||||
# Plot Learning Stats
|
||||
tb.add_scalar('ValEpochLoss/TotalLoss', avg_total_loss, current_step)
|
||||
tb.add_scalar('ValEpochLoss/LinearLoss', avg_linear_loss, current_step)
|
||||
tb.add_scalar('ValEpochLoss/MelLoss', avg_mel_loss, current_step)
|
||||
return avg_linear_loss
|
||||
|
||||
|
||||
def main(args):
|
||||
|
||||
# setup output paths and read configs
|
||||
c = load_config(args.config_path)
|
||||
_ = os.path.dirname(os.path.realpath(__file__))
|
||||
OUT_PATH = os.path.join(_, c.output_path)
|
||||
OUT_PATH = create_experiment_folder(OUT_PATH)
|
||||
CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints')
|
||||
shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json'))
|
||||
|
||||
# save config to tmp place to be loaded by subsequent modules.
|
||||
file_name = str(os.getpid())
|
||||
tmp_path = os.path.join("/tmp/", file_name+'_tts')
|
||||
pickle.dump(c, open(tmp_path, "wb"))
|
||||
|
||||
# setup tensorboard
|
||||
LOG_DIR = OUT_PATH
|
||||
tb = SummaryWriter(LOG_DIR)
|
||||
|
||||
# Ctrl+C handler to remove empty experiment folder
|
||||
def signal_handler(signal, frame):
|
||||
print(" !! Pressed Ctrl+C !!")
|
||||
remove_experiment_folder(OUT_PATH)
|
||||
sys.exit(1)
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
# Setup the dataset
|
||||
dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata.csv'),
|
||||
train_dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata_train.csv'),
|
||||
os.path.join(c.data_path, 'wavs'),
|
||||
c.r,
|
||||
c.sample_rate,
|
||||
|
@ -71,27 +300,42 @@ def main(args):
|
|||
c.power
|
||||
)
|
||||
|
||||
dataloader = DataLoader(dataset, batch_size=c.batch_size,
|
||||
shuffle=True, collate_fn=dataset.collate_fn,
|
||||
train_loader = DataLoader(train_dataset, batch_size=c.batch_size,
|
||||
shuffle=True, collate_fn=train_dataset.collate_fn,
|
||||
drop_last=True, num_workers=c.num_loader_workers,
|
||||
pin_memory=True)
|
||||
|
||||
val_dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata_val.csv'),
|
||||
os.path.join(c.data_path, 'wavs'),
|
||||
c.r,
|
||||
c.sample_rate,
|
||||
c.text_cleaner,
|
||||
c.num_mels,
|
||||
c.min_level_db,
|
||||
c.frame_shift_ms,
|
||||
c.frame_length_ms,
|
||||
c.preemphasis,
|
||||
c.ref_level_db,
|
||||
c.num_freq,
|
||||
c.power
|
||||
)
|
||||
|
||||
val_loader = DataLoader(val_dataset, batch_size=c.batch_size,
|
||||
shuffle=True, collate_fn=val_dataset.collate_fn,
|
||||
drop_last=True, num_workers= 4,
|
||||
pin_memory=True)
|
||||
|
||||
# setup the model
|
||||
model = Tacotron(c.embedding_size,
|
||||
c.hidden_size,
|
||||
c.num_mels,
|
||||
c.num_freq,
|
||||
c.r)
|
||||
|
||||
# plot model on tensorboard
|
||||
dummy_input = dataset.get_dummy_data()
|
||||
|
||||
## TODO: onnx does not support RNN fully yet
|
||||
# model_proto_path = os.path.join(OUT_PATH, "model.proto")
|
||||
# onnx.export(model, dummy_input, model_proto_path, verbose=True)
|
||||
# tb.add_graph_onnx(model_proto_path)
|
||||
|
||||
optimizer = optim.Adam(model.parameters(), lr=c.lr)
|
||||
|
||||
if use_cuda:
|
||||
criterion = nn.L1Loss().cuda()
|
||||
else:
|
||||
criterion = nn.L1Loss()
|
||||
|
||||
if args.restore_step:
|
||||
checkpoint = torch.load(os.path.join(
|
||||
|
@ -118,169 +362,20 @@ def main(args):
|
|||
|
||||
num_params = count_parameters(model)
|
||||
print(" | > Model has {} parameters".format(num_params))
|
||||
|
||||
model = model.train()
|
||||
|
||||
|
||||
if not os.path.exists(CHECKPOINT_PATH):
|
||||
os.mkdir(CHECKPOINT_PATH)
|
||||
|
||||
if use_cuda:
|
||||
criterion = nn.L1Loss().cuda()
|
||||
else:
|
||||
criterion = nn.L1Loss()
|
||||
|
||||
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
|
||||
|
||||
#lr_scheduler = ReduceLROnPlateau(optimizer, factor=c.lr_decay,
|
||||
# patience=c.lr_patience, verbose=True)
|
||||
epoch_time = 0
|
||||
|
||||
if 'best_loss' not in locals():
|
||||
best_loss = float('inf')
|
||||
|
||||
for epoch in range(0, c.epochs):
|
||||
|
||||
print("\n | > Epoch {}/{}".format(epoch, c.epochs))
|
||||
progbar = Progbar(len(dataset) / c.batch_size)
|
||||
|
||||
for num_iter, data in enumerate(dataloader):
|
||||
start_time = time.time()
|
||||
|
||||
text_input = data[0]
|
||||
text_lengths = data[1]
|
||||
linear_input = data[2]
|
||||
mel_input = data[3]
|
||||
|
||||
current_step = num_iter + args.restore_step + epoch * len(dataloader) + 1
|
||||
|
||||
# setup lr
|
||||
current_lr = lr_decay(c.lr, current_step, c.warmup_steps)
|
||||
for params_group in optimizer.param_groups:
|
||||
params_group['lr'] = current_lr
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
# Add a single frame of zeros to Mel Specs for better end detection
|
||||
#try:
|
||||
# mel_input = np.concatenate((np.zeros(
|
||||
# [c.batch_size, 1, c.num_mels], dtype=np.float32),
|
||||
# mel_input[:, 1:, :]), axis=1)
|
||||
#except:
|
||||
# raise TypeError("not same dimension")
|
||||
|
||||
# convert inputs to variables
|
||||
text_input_var = Variable(text_input)
|
||||
mel_spec_var = Variable(mel_input)
|
||||
linear_spec_var = Variable(linear_input, volatile=True)
|
||||
|
||||
# sort sequence by length.
|
||||
# TODO: might be unnecessary
|
||||
sorted_lengths, indices = torch.sort(
|
||||
text_lengths.view(-1), dim=0, descending=True)
|
||||
sorted_lengths = sorted_lengths.long().numpy()
|
||||
|
||||
text_input_var = text_input_var[indices]
|
||||
mel_spec_var = mel_spec_var[indices]
|
||||
linear_spec_var = linear_spec_var[indices]
|
||||
|
||||
if use_cuda:
|
||||
text_input_var = text_input_var.cuda()
|
||||
mel_spec_var = mel_spec_var.cuda()
|
||||
linear_spec_var = linear_spec_var.cuda()
|
||||
|
||||
mel_output, linear_output, alignments =\
|
||||
model.forward(text_input_var, mel_spec_var,
|
||||
input_lengths= torch.autograd.Variable(torch.cuda.LongTensor(sorted_lengths)))
|
||||
|
||||
mel_loss = criterion(mel_output, mel_spec_var)
|
||||
#linear_loss = torch.abs(linear_output - linear_spec_var)
|
||||
#linear_loss = 0.5 * \
|
||||
#torch.mean(linear_loss) + 0.5 * \
|
||||
#torch.mean(linear_loss[:, :n_priority_freq, :])
|
||||
linear_loss = 0.5 * criterion(linear_output, linear_spec_var) \
|
||||
+ 0.5 * criterion(linear_output[:, :, :n_priority_freq],
|
||||
linear_spec_var[: ,: ,:n_priority_freq])
|
||||
loss = mel_loss + linear_loss
|
||||
|
||||
loss.backward()
|
||||
grad_norm, skip_flag = check_update(model, 0.5, 100)
|
||||
if skip_flag:
|
||||
optimizer.zero_grad()
|
||||
print(" | > Iteration skipped!!")
|
||||
continue
|
||||
optimizer.step()
|
||||
|
||||
step_time = time.time() - start_time
|
||||
epoch_time += step_time
|
||||
|
||||
progbar.update(num_iter+1, values=[('total_loss', loss.data[0]),
|
||||
('linear_loss', linear_loss.data[0]),
|
||||
('mel_loss', mel_loss.data[0]),
|
||||
('grad_norm', grad_norm)])
|
||||
|
||||
# Plot Learning Stats
|
||||
tb.add_scalar('Loss/TotalLoss', loss.data[0], current_step)
|
||||
tb.add_scalar('Loss/LinearLoss', linear_loss.data[0],
|
||||
current_step)
|
||||
tb.add_scalar('Loss/MelLoss', mel_loss.data[0], current_step)
|
||||
tb.add_scalar('Params/LearningRate', optimizer.param_groups[0]['lr'],
|
||||
current_step)
|
||||
tb.add_scalar('Params/GradNorm', grad_norm, current_step)
|
||||
tb.add_scalar('Time/StepTime', step_time, current_step)
|
||||
|
||||
align_img = alignments[0].data.cpu().numpy()
|
||||
align_img = plot_alignment(align_img)
|
||||
tb.add_image('Attn/Alignment', align_img, current_step)
|
||||
|
||||
if current_step % c.save_step == 0:
|
||||
|
||||
if c.checkpoint:
|
||||
# save model
|
||||
save_checkpoint(model, optimizer, linear_loss.data[0],
|
||||
OUT_PATH, current_step, epoch)
|
||||
|
||||
# Diagnostic visualizations
|
||||
const_spec = linear_output[0].data.cpu().numpy()
|
||||
gt_spec = linear_spec_var[0].data.cpu().numpy()
|
||||
|
||||
const_spec = plot_spectrogram(const_spec, dataset.ap)
|
||||
gt_spec = plot_spectrogram(gt_spec, dataset.ap)
|
||||
tb.add_image('Spec/Reconstruction', const_spec, current_step)
|
||||
tb.add_image('Spec/GroundTruth', gt_spec, current_step)
|
||||
|
||||
align_img = alignments[0].data.cpu().numpy()
|
||||
align_img = plot_alignment(align_img)
|
||||
tb.add_image('Attn/Alignment', align_img, current_step)
|
||||
|
||||
# Sample audio
|
||||
audio_signal = linear_output[0].data.cpu().numpy()
|
||||
dataset.ap.griffin_lim_iters = 60
|
||||
audio_signal = dataset.ap.inv_spectrogram(audio_signal.T)
|
||||
try:
|
||||
tb.add_audio('SampleAudio', audio_signal, current_step,
|
||||
sample_rate=c.sample_rate)
|
||||
except:
|
||||
print("\n > Error at audio signal on TB!!")
|
||||
print(audio_signal.max())
|
||||
print(audio_signal.min())
|
||||
|
||||
|
||||
# average loss after the epoch
|
||||
avg_epoch_loss = np.mean(
|
||||
progbar.sum_values['linear_loss'][0] / max(1, progbar.sum_values['linear_loss'][1]))
|
||||
best_loss = save_best_model(model, optimizer, avg_epoch_loss,
|
||||
train_loss, current_step = train(model, criterion, train_loader, optimizer, epoch)
|
||||
val_loss = evaluate(model, criterion, val_loader, current_step)
|
||||
best_loss = save_best_model(model, optimizer, val_loss,
|
||||
best_loss, OUT_PATH,
|
||||
current_step, epoch)
|
||||
|
||||
tb.add_scalar('Time/EpochTime', epoch_time, epoch)
|
||||
epoch_time = 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--restore_step', type=int,
|
||||
help='Global step to restore checkpoint', default=0)
|
||||
parser.add_argument('--restore_path', type=str,
|
||||
help='Folder path to checkpoints', default=0)
|
||||
parser.add_argument('--config_path', type=str,
|
||||
help='path to config file for training',)
|
||||
args = parser.parse_args()
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
main(args)
|
||||
|
|
Загрузка…
Ссылка в новой задаче