sandbox/dan2: merge various changes from trunk, plus some changes to enable efficient GPU-based training after 'mix-up'.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@3108 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2013-10-23 04:35:25 +00:00
Родитель 2545c1576e
Коммит 0b13df94de
62 изменённых файлов: 1423 добавлений и 493 удалений

24
egs/swbd/s5b/local/run_nnet2.sh Executable file
Просмотреть файл

@ -0,0 +1,24 @@
#!/bin/bash
# This runs on the 100 hour subset.
. cmd.sh
( # TODO: change 5a to 5a2.
if [ ! -f exp/nnet5a2/final.mdl ]; then
steps/nnet2/train_tanh.sh --stage 215 \
--mix-up 8000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-jobs-nnet 16 --num-hidden-layers 4 \
--hidden-layer-dim 1024 \
--cmd "$decode_cmd" \
data/train_100k_nodup data/lang exp/tri4a exp/nnet5a2 || exit 1;
fi
for lm_suffix in tg fsh_tgpr; do
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 30 \
--config conf/decode.config --transform-dir exp/tri4a/decode_eval2000_sw1_${lm_suffix} \
exp/tri4a/graph_sw1_${lm_suffix} data/eval2000 exp/nnet5a2/decode_eval2000_sw1_${lm_suffix} &
done
)

Просмотреть файл

@ -1,17 +1,19 @@
#!/bin/bash
# This runs on the 100 hour subset. This version of the recipe runs on GPUs.
# We assume you have 8 GPU machines. You have to use --num-threads 1 so it will
# use the version of the code that can use GPUs.
# We assume the queue is set up as in JHU (or as in the "Kluster" project
# on Sourceforge) where "gpu" is a consumable resource that you can set to
# number of GPU cards a machine has.
# We assume you have 8 GPU cards. You have to use --num-threads 1 so it will
# use the version of the code that can use GPUs (the -parallel training code
# cannot use GPUs unless we make further modifications as the CUDA model assumes
# a single thread per GPU context, and we're not currently set up to create multiple
# GPU contexts. We assume the queue is set up as in JHU (or
# as in the "Kluster" project on Sourceforge) where "gpu" is a consumable
# resource that you can set to number of GPU cards a machine has.
. cmd.sh
(
if [ ! -f exp/nnet5b/final.mdl ]; then
steps/nnet2/train_tanh.sh --cmd "$decode_cmd -l gpu=1" --parallel-opts "" --stage 0 \
steps/nnet2/train_tanh.sh --cmd "$decode_cmd -l gpu=1" --parallel-opts "" --stage 253 \
--num-threads 1 \
--mix-up 8000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
@ -26,4 +28,3 @@
exp/tri4a/graph_sw1_${lm_suffix} data/eval2000 exp/nnet5b/decode_eval2000_sw1_${lm_suffix} &
done
)

Просмотреть файл

@ -0,0 +1,29 @@
#!/bin/bash
# This runs on the 100 hour subset. This version of the recipe runs on GPUs.
# We assume you have 8 GPU cards. You have to use --num-threads 1 so it will
# use the version of the code that can use GPUs (the -parallel training code
# cannot use GPUs unless we make further modifications as the CUDA model assumes
# a single thread per GPU context, and we're not currently set up to create multiple
# GPU contexts. We assume the queue is set up as in JHU (or
# as in the "Kluster" project on Sourceforge) where "gpu" is a consumable
# resource that you can set to number of GPU cards a machine has.
. cmd.sh
(
if [ ! -f exp/nnet5b/final.mdl ]; then
steps/nnet2/train_tanh.sh --cmd "$decode_cmd -l gpu=1" --parallel-opts "" --io-opts "-tc 5 -l gpu=0" --stage -3 \
--num-threads 1 --minibatch-size 512 --max-change 40.0 --mix-up 8000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-jobs-nnet 8 --num-hidden-layers 4 \
--hidden-layer-dim 1024 \
data/train_100k_nodup data/lang exp/tri4a exp/nnet5b2 || exit 1;
fi
for lm_suffix in tg fsh_tgpr; do
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 30 \
--config conf/decode.config --transform-dir exp/tri4a/decode_eval2000_sw1_${lm_suffix} \
exp/tri4a/graph_sw1_${lm_suffix} data/eval2000 exp/nnet5b2/decode_eval2000_sw1_${lm_suffix} &
done
)

Просмотреть файл

@ -6,7 +6,7 @@
(
if [ ! -f exp/nnet5a/final.mdl ]; then
steps/train_nnet_cpu.sh \
steps/train_nnet_cpu.sh --stage 103 \
--mix-up 8000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-jobs-nnet 16 --num-hidden-layers 4 \

Просмотреть файл

@ -62,7 +62,7 @@ max_change=10.0
mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
# specified.)
num_threads=16
parallel_opts="-pe smp $num_threads" # using a smallish #threads by default, out of stability concerns.
parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
# note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
cleanup=true
egs_dir=
@ -105,8 +105,9 @@ if [ $# != 4 ]; then
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
echo " # as well as speed; may interact with batch size; if you increase"
echo " # this, you may want to decrease the batch size."
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
echo " # use multiple threads."
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for jobs that do a lot of I/O."
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
echo " # should not get too large, e.g. >2k)."

Просмотреть файл

@ -50,7 +50,7 @@ num_hidden_layers=3
stage=-5
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time.
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't
splice_width=4 # meaning +- 4 frames on each side for second LDA
randprune=4.0 # speeds up LDA.
alpha=4.0
@ -58,7 +58,7 @@ max_change=10.0
mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
# specified.)
num_threads=16
parallel_opts="-pe smp $num_threads" # using a smallish #threads by default, out of stability concerns.
parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
# note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
cleanup=true
egs_dir=
@ -101,8 +101,9 @@ if [ $# != 4 ]; then
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
echo " # as well as speed; may interact with batch size; if you increase"
echo " # this, you may want to decrease the batch size."
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
echo " # use multiple threads."
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for jobs that do a lot of I/O."
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
echo " # should not get too large, e.g. >2k)."
@ -170,7 +171,7 @@ if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then
echo "$0: calling get_egs.sh"
[ ! -z $spk_vecs_dir ] && spk_vecs_opt="--spk-vecs-dir $spk_vecs_dir";
steps/nnet2/get_egs.sh $spk_vecs_opt --samples-per-iter $samples_per_iter --num-jobs-nnet $num_jobs_nnet \
--splice-width $splice_width --stage $get_egs_stage --cmd "$cmd" $egs_opts \
--splice-width $splice_width --stage $get_egs_stage --cmd "$cmd" $egs_opts --io-opts "$io_opts" \
$data $lang $alidir $dir || exit 1;
fi

Просмотреть файл

@ -9,6 +9,14 @@ SUBDIRS = base matrix util feat tree thread gmm tied transform sgmm \
fstext hmm lm decoder lat cudamatrix nnet \
bin fstbin gmmbin fgmmbin tiedbin sgmmbin featbin \
nnetbin latbin sgmm2 sgmm2bin nnet2 nnet2bin kwsbin
MEMTESTDIRS = base matrix util feat tree thread gmm tied transform sgmm \
fstext hmm lm decoder lat nnet \
bin fstbin gmmbin fgmmbin tiedbin sgmmbin featbin \
nnetbin latbin sgmm2 sgmm2bin nnet-cpu nnet-cpubin kwsbin
CUDAMEMTESTDIR = cudamatrix
SUBDIRS_LIB = $(filter-out %bin, $(SUBDIRS))
@ -97,8 +105,11 @@ ext_test: $(addsuffix /test, $(EXT_SUBDIRS))
%/test: % mklibdir
$(MAKE) -C $< test
cudavalgrind:
-for x in $(CUDAMEMTESTDIR); do $(MAKE) -C $$x valgrind || { echo "valgrind on $$x failed"; exit 1; }; done
valgrind:
-for x in $(SUBDIRS); do $(MAKE) -C $$x valgrind || { echo "valgrind on $$x failed"; exit 1; }; done
-for x in $(MEMTESTDIRS); do $(MAKE) -C $$x valgrind || { echo "valgrind on $$x failed"; exit 1; }; done
depend: $(addsuffix /depend, $(SUBDIRS))

Просмотреть файл

@ -12,7 +12,7 @@ LDFLAGS += $(CUDA_LDFLAGS)
LDLIBS += $(CUDA_LDLIBS)
TESTFILES = cu-vector-test cu-matrix-test cu-math-test cu-test cu-sp-matrix-test cu-packed-matrix-test cu-tp-matrix-test \
cu-block-matrix-test cu-matrix-speed-test cu-vector-speed-test cu-sp-matrix-speed-test
cu-block-matrix-test cu-matrix-speed-test cu-vector-speed-test cu-sp-matrix-speed-test cu-array-test
OBJFILES = cu-device.o cu-math.o cu-matrix.o cu-packed-matrix.o cu-sp-matrix.o \

Просмотреть файл

@ -1,6 +1,7 @@
// cudamatrix/cu-array-inl.h
// Copyright 2009-2012 Karel Vesely
// 2013 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
@ -113,7 +114,7 @@ void CuArray<T>::CopyToVec(std::vector<T> *dst) const {
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
CU_SAFE_CALL(cudaMemcpy(&dst->front(), Data(), dim_*sizeof(T), cudaMemcpyDeviceToHost));
CuDevice::Instantiate().AccuProfile("CuArray::CopyToVecD2H",tim.Elapsed());
CuDevice::Instantiate().AccuProfile("CuArray::CopyToVecD2H", tim.Elapsed());
} else
#endif
{
@ -129,7 +130,7 @@ void CuArray<T>::SetZero() {
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
CU_SAFE_CALL(cudaMemset(data_, 0, dim_ * sizeof(T)));
CuDevice::Instantiate().AccuProfile("CuArray::SetZero",tim.Elapsed());
CuDevice::Instantiate().AccuProfile("CuArray::SetZero", tim.Elapsed());
} else
#endif
{
@ -184,6 +185,24 @@ inline void CuArray<int32>::Set(const int32 &value) {
}
}
template<typename T>
void CuArray<T>::CopyFromArray(const CuArray<T> &src) {
this->Resize(src.Dim(), kUndefined);
if (dim_ == 0) return;
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
CU_SAFE_CALL(cudaMemcpy(this->data_, src.data_, dim_ * sizeof(T),
cudaMemcpyDeviceToDevice));
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
} else
#endif
{
memcpy(this->data_, src.data_, dim_ * sizeof(T));
}
}
} // namespace kaldi
#endif

Просмотреть файл

@ -0,0 +1,124 @@
// cudamatrix/cu-array-test.cc
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <vector>
#include <cstdlib>
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "cudamatrix/cu-array.h"
using namespace kaldi;
namespace kaldi {
template<class T>
static void UnitTestCuArray() {
for (int32 i = 0; i < 30; i++) {
int32 size = rand() % 5;
size = size * size * size; // Have a good distribution of sizes, including >256.
int32 size2 = rand() % 4;
std::vector<T> vec(size);
std::vector<T> garbage_vec(size2); // We just use garbage_vec to make sure
// we sometimes resize from empty,
// sometimes not.
int32 byte_size = size * sizeof(T);
std::vector<char> rand_c(byte_size);
for (size_t i = 0; i < byte_size; i++)
rand_c[i] = rand() % 256;
if (!vec.empty()) {
std::memcpy((void*)&(vec[0]), (void*)&(rand_c[0]),
byte_size);
}
{ // test constructor from vector and CopyToVec.
CuArray<T> cu_vec(vec);
std::vector<T> vec2;
cu_vec.CopyToVec(&vec2);
KALDI_ASSERT(vec2 == vec);
}
{ // test assignment operator from CuArray.
CuArray<T> cu_vec(vec);
CuArray<T> cu_vec2(garbage_vec);
cu_vec2 = cu_vec;
std::vector<T> vec2;
cu_vec2.CopyToVec(&vec2);
KALDI_ASSERT(vec2 == vec);
KALDI_ASSERT(cu_vec2.Dim() == int32(vec2.size())); // test Dim()
}
{ // test resize with resize_type = kSetZero.
CuArray<T> cu_vec(vec);
cu_vec.Resize(size, kSetZero);
std::vector<T> vec2(vec);
if (!vec2.empty())
std::memset(&(vec2[0]), 0, vec2.size() * sizeof(T));
std::vector<T> vec3;
cu_vec.CopyToVec(&vec3);
KALDI_ASSERT(vec2 == vec3); // testing equality of zero arrays.
}
if (sizeof(T) == sizeof(int32) && size > 0) { // test Set for type int32, or same size.
CuArray<T> cu_vec(vec);
cu_vec.Set(vec[0]);
for (size_t i = 1; i < vec.size(); i++) vec[i] = vec[0];
std::vector<T> vec2;
cu_vec.CopyToVec(&vec2);
KALDI_ASSERT(vec2 == vec);
}
}
}
} // namespace kaldi
int main() {
for (int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId(-1); // -1 means no GPU
else
CuDevice::Instantiate().SelectGpuId(-2); // -2 .. automatic selection
#endif
//kaldi::UnitTestCuArray<float>();
kaldi::UnitTestCuArray<double>();
kaldi::UnitTestCuArray<int32>();
kaldi::UnitTestCuArray<std::pair<int32, int32> >();
if (loop == 0)
KALDI_LOG << "Tests without GPU use succeeded.\n";
else
KALDI_LOG << "Tests with GPU use (if available) succeeded.\n";
}
#if HAVE_CUDA == 1
CuDevice::Instantiate().PrintProfile();
#endif
return 0;
}

Просмотреть файл

@ -49,7 +49,10 @@ class CuArray {
/// Constructor from CPU-based int vector
explicit CuArray<T>(const std::vector<T> &src):
dim_(0), data_(NULL) { CopyFromVec(src); }
explicit CuArray<T>(const CuArray<T> &src):
dim_(0), data_(NULL) { CopyFromArray(src); }
/// Destructor
~CuArray() { Destroy(); }
@ -73,6 +76,9 @@ class CuArray {
/// and any constructors or assignment operators are not called.
void CopyFromVec(const std::vector<T> &src);
/// This function resizes if needed.
void CopyFromArray(const CuArray<T> &src);
/// This function resizes *dst if needed. On resize of "dst", the STL vector
/// may call copy-constructors, initializers, and assignment operators for
/// existing objects (which will be overwritten), but the copy from GPU to CPU
@ -88,6 +94,14 @@ class CuArray {
/// assignment operators or destructors are not called. This is NOT IMPLEMENTED
/// YET except for T == int32 (the current implementation will just crash).
void Set(const T &value);
CuArray<T> &operator= (const CuArray<T> &in) {
this->CopyFromArray(in); return *this;
}
CuArray<T> &operator= (const std::vector<T> &in) {
this->CopyFromVec(in); return *this;
}
private:
MatrixIndexT dim_; ///< dimension of the vector

Просмотреть файл

@ -165,6 +165,7 @@ void CuBlockMatrix<Real>::Write(std::ostream &os, bool binary) const {
WriteToken(os, binary, "</CuBlockMatrix>");
}
template<class Real>
void CuBlockMatrix<Real>::Read(std::istream &is, bool binary) {
Destroy();

Просмотреть файл

@ -44,6 +44,7 @@ namespace kaldi {
'primary' home remains on the CPU.. what we mean by this is that
while the data remains on the GPU, the "primary" version of the
Matrix object that holds the pointers will remain on the CPU.
We just copy it over to the GPU whenever it is changed.
*/
template<typename Real>
@ -94,10 +95,15 @@ class CuBlockMatrix {
/// Copies elements within the block structure from matrix M, discarding others.
/// Note: this has not been impelemented in a very efficient way, it's used only
/// Note: this has not been implemented in a very efficient way, it's used only
/// for testing.
void CopyFromMat(const CuMatrix<Real> &M);
/// Normalizes the columns of *this so that each one sums to one.
/// On error (e.g. inf's), will set the column to a constant value that
/// sums to one.
void NormalizeColumns();
void Swap(CuBlockMatrix *other);
protected:

Просмотреть файл

@ -79,7 +79,7 @@ void CuDevice::SelectGpuId(int32 gpu_id, bool abort_on_error) {
// Check that we have a gpu available
int32 n_gpu = 0;
cudaGetDeviceCount(&n_gpu);
if(n_gpu == 0 && gpu_id == -2) {
if(n_gpu == 0) {
// If we do automatic selection and no GPU is found, we run on a CPU
if (abort_on_error) {
KALDI_ERR << "No CUDA capable GPU was detected";
@ -89,16 +89,6 @@ void CuDevice::SelectGpuId(int32 gpu_id, bool abort_on_error) {
return;
}
}
if(n_gpu == 0) {
if (abort_on_error) {
KALDI_ERR << "No CUDA capable GPU was detected.";
} else {
KALDI_WARN << "No CUDA capable GPU detected, while explicitly asked for gpu-id '"
<< gpu_id << "'.CUDA will NOT be used!!!";
active_gpu_id_ = -2;
return;
}
}
// Now we know that there is a GPU in the system,
// and we don't want to have it disabled.
@ -390,7 +380,7 @@ void CuDevice::PrintProfile() {
for(it = profile_map_.begin(); it != profile_map_.end(); ++it)
pairs.push_back(std::make_pair(it->second, it->first));
std::sort(pairs.begin(), pairs.end());
size_t max_print = 15, start_pos = (pairs.size() > max_print ?
size_t max_print = 15, start_pos = (pairs.size() <= max_print ?
0 : pairs.size() - max_print);
for (size_t i = start_pos; i < pairs.size(); i++)
os << pairs[i].second << "\t" << pairs[i].first << "s\n";

Просмотреть файл

@ -148,6 +148,9 @@ void cudaF_comp_obj_deriv(dim3 Gr,dim3 Bl, MatrixElement<float>* x, int s, const
void cudaF_transpose_matrix(dim3 Gr, dim3 Bl, float* mat, MatrixDim d);
void cudaF_sy_add_tr2(dim3 Gr, dim3 Bl, float alpha, float beta, const float* T, MatrixDim tdim,
float *S, MatrixDim sdim);
void cudaF_sum_column_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim,
const float *src_data, MatrixDim src_dim,
const Int32Pair *indices);
/*********************************************************
@ -277,6 +280,10 @@ void cudaD_comp_obj_deriv(dim3 Gr,dim3 Bl, MatrixElement<double>* x, int s, cons
void cudaD_transpose_matrix(dim3 Gr, dim3 Bl, double* mat, MatrixDim d);
void cudaD_sy_add_tr2(dim3 Gr, dim3 Bl, double alpha, double beta, const double* T, MatrixDim tdim,
double *S, MatrixDim sdim);
void cudaD_sum_column_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim,
const double *src_data, MatrixDim src_dim,
const Int32Pair *indices);
} // extern "C"

Просмотреть файл

@ -1297,6 +1297,64 @@ static void _block_add_mat_mat(CuBlockMatrixData *B_cu_data, int num_blocks,
}
template<typename Real>
__global__
static void _blockadd_mat_blockmat_trans(Real *data, MatrixDim dim, const Real *A_data, int A_num_rows, int A_num_cols,
int A_row_stride, int A_col_stride, const CuBlockMatrixData *B_cu_data,
int B_num_blocks, Real alpha, Real beta) {
int i = blockIdx.x * blockDim.x + threadIdx.x; // row-index into "data"
int j = blockIdx.y * blockDim.y + threadIdx.y; // block-index into B.
if (i >= A_num_rows || j >= B_num_blocks) return;
const CuBlockMatrixData &cu_data = B_cu_data[j];
// BT means B transposed.
int BT_row_start = cu_data.col_offset,
BT_col_start = cu_data.row_offset,
BT_num_rows = cu_data.matrix_dim.cols,
BT_num_cols = cu_data.matrix_dim.rows,
BT_col_stride = cu_data.matrix_dim.stride;
const Real *B_data = static_cast<Real*>(cu_data.matrix_data); // Cast from void;
// we avoided a bunch of hassle by doing this (relates to Ansi-C requirement).
for (int k = 0; k < BT_num_cols; k++) {
const Real *this_BT_col = B_data + k * BT_col_stride;
const Real *this_A_row = A_data + i * A_row_stride + BT_row_start * A_col_stride;
// this_A_row points to the element A[i][BT_row_start], it's really just
// part of this row of A.
Real sum = 0.0;
for (int l = 0; l < BT_num_rows; l++) // l indexes rows of B.
sum += this_BT_col[l] * this_A_row[l * A_col_stride];
int index = i * dim.stride + (k + BT_col_start);
data[index] = alpha * sum + beta * data[index];
}
}
// Since this is a newer kernel, x is the row-index and y is the
// column-index.
template<typename Real>
__global__
static void _sum_column_ranges(Real *data, MatrixDim dim,
const Real *src_data,
MatrixDim src_dim,
const Int32Pair *indices) {
int row = blockIdx.x * blockDim.x + threadIdx.x;
int col = blockIdx.y * blockDim.y + threadIdx.y;
if (row >= dim.rows || col >= dim.cols)
return;
int dest_index = row * dim.stride + col,
src_start_index = row * src_dim.stride + indices[col].first,
src_end_index = row * src_dim.stride + indices[col].second;
Real sum = 0.0;
for (int index = src_start_index; index < src_end_index; index++)
sum += src_data[index];
data[dest_index] = sum;
}
template<typename Real>
__global__
static void _soft_hinge(Real*y, const Real*x, MatrixDim d, int src_stride) {
@ -2047,6 +2105,11 @@ void cudaF_copy_col_from_mat_fd(int Gr, int Bl, float* v, int col, const float*
_copy_col_from_mat_fd<<<Gr,Bl>>>(v,col,mat,dmat,dim);
}
void cudaF_sum_column_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim,
const float *src_data, MatrixDim src_dim,
const Int32Pair *indices) {
_sum_column_ranges<<<Gr,Bl>>>(data, dim, src_data, src_dim, indices);
}
@ -2407,6 +2470,11 @@ void cudaD_copy_rows_from_vec(dim3 Gr, dim3 Bl, double *mat_out, MatrixDim d_out
_copy_rows_from_vec<<<Gr,Bl>>>(mat_out, d_out, v_in);
}
void cudaD_sum_column_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim,
const double *src_data, MatrixDim src_dim,
const Int32Pair *indices) {
_sum_column_ranges<<<Gr,Bl>>>(data, dim, src_data, src_dim, indices);
}
/* Some conversion kernels for which it's more convenient to not name them F or D. */

Просмотреть файл

@ -207,7 +207,12 @@ inline void cuda_take_lower(dim3 Gr, dim3 Bl, const float* x, float* y, MatrixDi
inline void cuda_take_upper(dim3 Gr, dim3 Bl, const float* x, float* y, MatrixDim d_in) { cudaF_take_upper(Gr,Bl,x,y,d_in); }
inline void cuda_take_mean(dim3 Gr, dim3 Bl, const float* x, float* y, MatrixDim d_in) { cudaF_take_mean(Gr,Bl,x,y,d_in); }
inline void cuda_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement<float>* x, int32 size, const float* z, MatrixDim d, float* z2, MatrixDim d2, float* t) {cudaF_comp_obj_deriv(Gr,Bl,x,size,z,d,z2,d2,t); }
inline void cuda_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement<double>* x, int32 size, const double* z, MatrixDim d, double* z2, MatrixDim d2, double* t) {cudaD_comp_obj_deriv(Gr,Bl,x,size,z,d,z2,d2,t); }
inline void cuda_sum_column_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim,
const float *src_data, MatrixDim src_dim,
const Int32Pair *indices) {
cudaF_sum_column_ranges(Gr, Bl, data, dim, src_data, src_dim, indices);
}
// double versions
@ -347,6 +352,11 @@ inline void cuda_copy_from_sp(int Gr, int Bl, const double* x, double* y, int d_
inline void cuda_take_lower(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim d_in) { cudaD_take_lower(Gr,Bl,x,y,d_in); }
inline void cuda_take_upper(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim d_in) { cudaD_take_upper(Gr,Bl,x,y,d_in); }
inline void cuda_take_mean(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim d_in) { cudaD_take_mean(Gr,Bl,x,y,d_in); }
inline void cuda_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement<double>* x, int32 size, const double* z, MatrixDim d, double* z2, MatrixDim d2, double* t) {cudaD_comp_obj_deriv(Gr,Bl,x,size,z,d,z2,d2,t); }
inline void cuda_sum_column_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim,
const double *src_data, MatrixDim src_dim, const Int32Pair *indices) {
cudaD_sum_column_ranges(Gr, Bl, data, dim, src_data, src_dim, indices);
}
// Also include some template-friendly wrappers of cublas functions:

Просмотреть файл

@ -67,6 +67,7 @@ void Splice(const CuMatrix<Real> &src,
/// The matrices src and tgt must have the same dimensions and
/// the dimension of copy_from_indices must equal the number of columns
/// in the src matrix. As a result, tgt(i, j) == src(i, copy_from_indices[j]).
/// Also see CuMatrix::CopyCols(), which is more general.
template<typename Real>
void Copy(const CuMatrix<Real> &src,
const CuArray<int32> &copy_from_indices,

Просмотреть файл

@ -340,6 +340,47 @@ template<typename Real> void UnitTestCuMatrixCopyCross2() {
}
}
template<typename Real>
static void UnitTestCuMatrixSumColumnRanges() {
for (MatrixIndexT p = 0; p < 10; p++) {
MatrixIndexT num_cols1 = 10 + rand() % 10,
num_cols2 = 10 + rand() % 10,
num_rows = 10 + rand() % 10;
Matrix<Real> src(num_rows, num_cols1);
Matrix<Real> dst(num_rows, num_cols2);
std::vector<Int32Pair> indices(num_cols2);
for (MatrixIndexT i = 0; i < num_cols2; i++) {
indices[i].first = rand() % num_cols1;
int32 headroom = num_cols1 - indices[i].first,
size = (rand() % headroom) + 1;
indices[i].second = indices[i].first + size;
KALDI_ASSERT(indices[i].second >= indices[i].first &&
indices[i].second <= num_cols1 &&
indices[i].first >= 0);
// In the test we allow second == first.
}
src.SetRandn();
// Simple computation:
for (MatrixIndexT i = 0; i < num_rows; i++) {
for (MatrixIndexT j = 0; j < num_cols2; j++) {
int32 start = indices[j].first, end = indices[j].second;
Real sum = 0.0;
for (MatrixIndexT j2 = start; j2 < end; j2++)
sum += src(i, j2);
dst(i, j) = sum;
}
}
CuMatrix<Real> cu_src(src);
CuMatrix<Real> cu_dst(num_rows, num_cols2, kUndefined);
CuArray<Int32Pair> indices_tmp(indices);
cu_dst.SumColumnRanges(cu_src, indices_tmp);
Matrix<Real> dst2(cu_dst);
AssertEqual(dst, dst2);
}
}
template<typename Real>
static void UnitTestCuMatrixCopyCols() {
for (MatrixIndexT p = 0; p < 10; p++) {
@ -353,8 +394,13 @@ static void UnitTestCuMatrixCopyCols() {
std::vector<int32> reorder(num_cols2);
for (int32 i = 0; i < num_cols2; i++)
reorder[i] = -1 + (rand() % (num_cols1 + 1));
N.CopyCols(M, reorder);
if (rand() % 2 == 0) {
N.CopyCols(M, reorder);
} else {
CuArray<int32> cuda_reorder(reorder);
N.CopyCols(M, cuda_reorder);
}
for (int32 i = 0; i < num_rows; i++)
for (int32 j = 0; j < num_cols2; j++)
@ -1640,6 +1686,7 @@ template<typename Real> void CudaMatrixUnitTest() {
UnitTestCuMatrixCopyFromTp<Real>();
UnitTestCuMatrixAddMatTp<Real>();
UnitTestCuMatrixCopyCols<Real>();
UnitTestCuMatrixSumColumnRanges<Real>();
UnitTestCuMatrixCopyRows<Real>();
UnitTestCuMatrixCopyRowsFromVec<Real>();
UnitTestCuMatrixAddTpMat<Real>();

Просмотреть файл

@ -1622,6 +1622,7 @@ void VectorBase<float>::CopyRowsFromMat(const CuMatrixBase<float> &mat);
template
void VectorBase<double>::CopyRowsFromMat(const CuMatrixBase<double> &mat);
template<typename Real>
void CuMatrixBase<Real>::CopyCols(const CuMatrixBase<Real> &src,
const std::vector<MatrixIndexT> &reorder) {
@ -1650,6 +1651,30 @@ void CuMatrixBase<Real>::CopyCols(const CuMatrixBase<Real> &src,
}
}
template<typename Real>
void CuMatrixBase<Real>::CopyCols(const CuMatrixBase<Real> &src,
const CuArray<MatrixIndexT> &reorder) {
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
KALDI_ASSERT(reorder.Dim() == NumCols());
KALDI_ASSERT(NumRows() == src.NumRows());
Timer tim;
dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
// This kernel, as it is newer has the (x,y) dims as (rows,cols).
dim3 dimGrid(n_blocks(NumRows(), CU2DBLOCK), n_blocks(NumCols(), CU2DBLOCK));
cuda_copy_cols(dimGrid, dimBlock, data_, src.Data(), reorder.Data(), Dim(), src.Stride());
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
} else
#endif
{
std::vector<MatrixIndexT> reorder_cpu;
reorder.CopyToVec(&reorder_cpu);
Mat().CopyCols(src.Mat(), reorder_cpu);
}
}
template<typename Real>
void CuMatrixBase<Real>::CopyRows(const CuMatrixBase<Real> &src,
const std::vector<MatrixIndexT> &reorder) {
@ -1678,6 +1703,46 @@ void CuMatrixBase<Real>::CopyRows(const CuMatrixBase<Real> &src,
}
}
template<typename Real>
void CuMatrixBase<Real>::SumColumnRanges(const CuMatrixBase<Real> &src,
const CuArray<Int32Pair> &indices) {
KALDI_ASSERT(static_cast<MatrixIndexT>(indices.Dim()) == NumCols());
KALDI_ASSERT(NumRows() == src.NumRows());
if (NumRows() == 0) return;
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
// This kernel, as it is newer has the (x,y) dims as (rows,cols).
dim3 dimGrid(n_blocks(NumRows(), CU2DBLOCK), n_blocks(NumCols(), CU2DBLOCK));
cuda_sum_column_ranges(dimGrid, dimBlock, data_, Dim(), src.Data(), src.Dim(), indices.Data());
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
} else
#endif
{ // Implement here for the CPU..
int32 num_rows = this->num_rows_, num_cols = this->num_cols_,
this_stride = this->stride_, src_stride = src.stride_;
Real *data = this->data_;
const Real *src_data = src.data_;
const Int32Pair *indices_data = indices.Data();
for (int32 row = 0; row < num_rows; row++) {
for (int32 col = 0; col < num_cols; col++) {
int32 start_col = indices_data[col].first,
end_col = indices_data[col].second;
Real sum = 0.0;
for (int32 src_col = start_col; src_col < end_col; src_col++)
sum += src_data[row * src_stride + src_col];
data[row * this_stride + col] = sum;
}
}
}
}
template<typename Real>
void CuMatrixBase<Real>::CopyLowerToUpper() {
KALDI_ASSERT(num_cols_ == num_rows_);

Просмотреть файл

@ -91,6 +91,11 @@ class CuMatrixBase {
void CopyCols(const CuMatrixBase<Real> &src,
const std::vector<MatrixIndexT> &indices);
/// Version of CopyCols that takes CuArray argument.
void CopyCols(const CuMatrixBase<Real> &src,
const CuArray<MatrixIndexT> &indices);
/// Copies row r from row indices[r] of src.
/// As a special case, if indexes[i] <== -1, sets row i to zero
/// "reorder".size() must equal this->NumRows(),
@ -100,6 +105,13 @@ class CuMatrixBase {
const std::vector<MatrixIndexT> &indices);
/// For each row r of this and for each column c, sets (*this)(r, c) to the
/// sum \sum_j src(r, j), where j ranges from indices[c].first through
/// indices[c].second - 1.
void SumColumnRanges(const CuMatrixBase<Real> &src,
const CuArray<Int32Pair> &indices);
friend Real TraceMatMat<Real>(const CuMatrixBase<Real> &A,
const CuMatrixBase<Real> &B,
MatrixTransposeType trans);

Просмотреть файл

@ -81,6 +81,10 @@ extern "C" {
// decided to make this a void* pointer.
} CuBlockMatrixData;
typedef struct Int32Pair {
int32_cuda first;
int32_cuda second;
} Int32Pair;
}
#endif

Просмотреть файл

@ -221,6 +221,7 @@ class CuVector: public CuVectorBase<Real> {
CuVector(MatrixIndexT dim, MatrixResizeType t = kSetZero) { Resize(dim, t); }
CuVector(const CuVectorBase<Real> &v);
CuVector(const VectorBase<Real> &v);
explicit CuVector(const CuVector<Real> &v) : CuVectorBase<Real>() {
Resize(v.Dim(), kUndefined);

Просмотреть файл

@ -226,12 +226,14 @@ template<class IntType> class LatticeStringRepository {
typedef unordered_set<const Entry*, EntryKey, EntryEqual> SetType;
void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
if (to_add == NULL) return;
else {
while(true) {
if (to_add == NULL) return;
typename SetType::iterator iter = tmp_set->find(to_add);
if (iter == tmp_set->end()) { // not in tmp_set.
tmp_set->insert(to_add);
RebuildHelper(to_add->parent, tmp_set); // make sure parent there.
to_add = to_add->parent; // and loop.
} else {
return;
}
}
}

Просмотреть файл

@ -88,7 +88,7 @@ template<class Label, class StringId> class StringRepository {
else if (id>=single_symbol_start) {
v->resize(1); (*v)[0] = id - single_symbol_start;
} else {
assert(id>=string_start && id < static_cast<StringId>(vec_.size()));
assert(id >= string_start && id < static_cast<StringId>(vec_.size()));
*v = *(vec_[id]);
}
}

Просмотреть файл

@ -10,7 +10,7 @@ LDLIBS += $(CUDA_LDLIBS)
TESTFILES = nnet-test
OBJFILES = nnet-nnet.o nnet-component.o nnet-loss.o nnet-cache.o \
nnet-cache-tgtmat.o nnet-loss-prior.o nnet-pdf-prior.o
nnet-cache-tgtmat.o nnet-cache-conf.o nnet-loss-prior.o nnet-pdf-prior.o
LIBNAME = kaldi-nnet

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-activation.h
// Copyright 2011 Karel Vesely
// Copyright 2011-2013 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//
@ -30,15 +30,14 @@ namespace nnet1 {
class Softmax : public Component {
public:
Softmax(int32 dim_in, int32 dim_out, Nnet *nnet)
: Component(dim_in, dim_out, nnet)
Softmax(int32 dim_in, int32 dim_out)
: Component(dim_in, dim_out)
{ }
~Softmax()
{ }
ComponentType GetType() const {
return kSoftmax;
}
Component* Copy() const { return new Softmax(*this); }
ComponentType GetType() const { return kSoftmax; }
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
// y = e^x_j/sum_j(e^x_j)
@ -60,15 +59,14 @@ class Softmax : public Component {
class Sigmoid : public Component {
public:
Sigmoid(int32 dim_in, int32 dim_out, Nnet *nnet)
: Component(dim_in, dim_out, nnet)
Sigmoid(int32 dim_in, int32 dim_out)
: Component(dim_in, dim_out)
{ }
~Sigmoid()
{ }
ComponentType GetType() const {
return kSigmoid;
}
Component* Copy() const { return new Sigmoid(*this); }
ComponentType GetType() const { return kSigmoid; }
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
// y = 1/(1+e^-x)
@ -86,15 +84,14 @@ class Sigmoid : public Component {
class Tanh : public Component {
public:
Tanh(int32 dim_in, int32 dim_out, Nnet *nnet)
: Component(dim_in, dim_out, nnet)
Tanh(int32 dim_in, int32 dim_out)
: Component(dim_in, dim_out)
{ }
~Tanh()
{ }
ComponentType GetType() const {
return kTanh;
}
Component* Copy() const { return new Tanh(*this); }
ComponentType GetType() const { return kTanh; }
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
// y = (e^x - e^(-x)) / (e^x + e^(-x))
@ -112,15 +109,14 @@ class Tanh : public Component {
class Dropout : public Component {
public:
Dropout(int32 dim_in, int32 dim_out, Nnet *nnet):
Component(dim_in, dim_out, nnet)
Dropout(int32 dim_in, int32 dim_out):
Component(dim_in, dim_out)
{ }
~Dropout()
{ }
ComponentType GetType() const {
return kDropout;
}
Component* Copy() const { return new Dropout(*this); }
ComponentType GetType() const { return kDropout; }
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
out->CopyFromMat(in);

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-affine-transform.h
// Copyright 2011 Karel Vesely
// Copyright 2011 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//
@ -31,17 +31,16 @@ namespace nnet1 {
class AffineTransform : public UpdatableComponent {
public:
AffineTransform(int32 dim_in, int32 dim_out, Nnet *nnet)
: UpdatableComponent(dim_in, dim_out, nnet),
AffineTransform(int32 dim_in, int32 dim_out)
: UpdatableComponent(dim_in, dim_out),
linearity_(dim_out, dim_in), bias_(dim_out),
linearity_corr_(dim_out, dim_in), bias_corr_(dim_out)
{ }
~AffineTransform()
{ }
ComponentType GetType() const {
return kAffineTransform;
}
Component* Copy() const { return new AffineTransform(*this); }
ComponentType GetType() const { return kAffineTransform; }
void ReadData(std::istream &is, bool binary) {
linearity_.Read(is, binary);

247
src/nnet/nnet-cache-conf.cc Normal file
Просмотреть файл

@ -0,0 +1,247 @@
// nnet/nnet-cache-conf.cc
// Copyright 2013 Brno University of Technology (author: Karel Vesely)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "nnet/nnet-cache-conf.h"
#include "cudamatrix/cu-math.h"
#include <algorithm>
namespace kaldi {
namespace nnet1 {
void CacheConf::Init(int32 cachesize, int32 bunchsize) {
KALDI_ASSERT(cachesize>0);
if(cachesize > 8388479) {
KALDI_ERR << "CacheConfsize " << cachesize << " too large, use cachesize smaller than 8388480.";
}
KALDI_ASSERT(bunchsize>0);
KALDI_ASSERT(cachesize>=bunchsize);
if ((cachesize % bunchsize) != 0) {
KALDI_ERR << "Non divisible cachesize by bunchsize";
}
cachesize_ = cachesize;
bunchsize_ = bunchsize;
state_ = EMPTY;
filling_pos_ = 0;
emptying_pos_ = 0;
randomized_ = false;
}
void CacheConf::AddData(const CuMatrix<BaseFloat> &features, const std::vector<int32> &targets, const Vector<BaseFloat> &confidence) {
if (state_ == FULL) {
KALDI_ERR << "Cannot add data, cache already full";
}
KALDI_ASSERT(features.NumRows() == static_cast<int32>(targets.size()));
KALDI_ASSERT(features.NumRows() == static_cast<int32>(confidence.Dim()));
int32 dim_fea = features.NumCols();
// lazy buffers allocation
if (features_.NumRows() != cachesize_) {
features_.Resize(cachesize_, dim_fea);
targets_.resize(cachesize_);
confidence_.Resize(cachesize_);
}
// warn if segment longer than half-cache
// (frame level shuffling accross sentences will be poor)
if (features.NumRows() > cachesize_/4) {
KALDI_WARN << "Too long segment or small cachesize!"
<< " (cache-size " << cachesize_ << ") < (4 x"
<< " segment-size " << features.NumRows() << ").";
}
// change state
if (state_ == EMPTY) {
state_ = FILLING; filling_pos_ = 0;
// check for leftover from previous segment
int leftover = features_leftover_.NumRows();
// check if leftover is not bigger than half-cachesize
if (leftover > cachesize_/2) {
KALDI_WARN << "Truncating "
<< leftover - cachesize_/2
<< " frames from leftover of previous segment "
<< "(max leftover " << cachesize_/2 << ").";
leftover = cachesize_/2;
}
// prefill cache with leftover
if (leftover > 0) {
features_.RowRange(0, leftover).CopyFromMat(
features_leftover_.RowRange(0, leftover)
);
std::copy(targets_leftover_.begin(),
targets_leftover_.begin() + leftover,
targets_.begin());
confidence_.Range(0, leftover).CopyFromVec(
confidence_leftover_.Range(0, leftover)
);
features_leftover_.Resize(0, 0);
targets_leftover_.resize(0);
confidence_leftover_.Resize(0);
filling_pos_ += leftover;
}
}
KALDI_ASSERT(state_ == FILLING);
KALDI_ASSERT(features.NumRows() == static_cast<MatrixIndexT>(targets.size()));
int cache_space = cachesize_ - filling_pos_;
int feature_length = features.NumRows();
int fill_rows = (cache_space<feature_length)? cache_space : feature_length;
int leftover = feature_length - fill_rows;
KALDI_ASSERT(cache_space > 0);
// copy the data to cache
features_.RowRange(filling_pos_, fill_rows).CopyFromMat(
features.RowRange(0, fill_rows)
);
std::copy(targets.begin(),
targets.begin()+fill_rows,
targets_.begin()+filling_pos_);
confidence_.Range(filling_pos_,fill_rows).
CopyFromVec(confidence.Range(0,fill_rows));
// copy leftovers
if (leftover > 0) {
features_leftover_.Resize(leftover, dim_fea);
features_leftover_.CopyFromMat(
features.RowRange(fill_rows, leftover)
);
KALDI_ASSERT(targets.end()-(targets.begin()+fill_rows)==leftover);
targets_leftover_.resize(leftover);
std::copy(targets.begin()+fill_rows,
targets.end(),
targets_leftover_.begin());
confidence_leftover_.Resize(leftover);
confidence_leftover_.CopyFromVec(confidence.Range(fill_rows,leftover));
}
// update cursor
filling_pos_ += fill_rows;
// change state
if (filling_pos_ == cachesize_) {
state_ = FULL;
}
}
void CacheConf::Randomize() {
KALDI_ASSERT(state_ == FULL || state_ == FILLING);
// lazy initialization of the output buffers
features_random_.Resize(cachesize_, features_.NumCols());
targets_random_.resize(cachesize_);
confidence_random_.Resize(cachesize_);
// generate random series of integers
randmask_.resize(filling_pos_);
GenerateRandom randomizer;
for(int32 i=0; i<filling_pos_; i++) { randmask_[i]=i; }
std::random_shuffle(randmask_.begin(), randmask_.end(), randomizer);
// get it to the gpu
randmask_device_.CopyFromVec(randmask_);
// randomize the features
cu::Randomize(features_, randmask_device_, &features_random_);
// randomize the targets
for(int32 i=0; i<filling_pos_; i++) {
targets_random_[i] = targets_[randmask_[i]];
}
// randomize the confidences
for(int32 i=0; i<filling_pos_; i++) {
confidence_random_(i) = confidence_(randmask_[i]);
}
randomized_ = true;
}
void CacheConf::GetBunch(CuMatrix<BaseFloat> *features, std::vector<int32> *targets, Vector<BaseFloat> *confidence) {
if (state_ == EMPTY) {
KALDI_ERR << "GetBunch on empty cache!!!";
}
// change state if full...
if (state_ == FULL) {
state_ = EMPTYING; emptying_pos_ = 0;
}
// final cache is not completely filled
if (state_ == FILLING) {
state_ = EMPTYING; emptying_pos_ = 0;
}
KALDI_ASSERT(state_ == EMPTYING);
const CuMatrixBase<BaseFloat> &features_ref = (randomized_ ?
features_random_ : features_);
const std::vector<int32> &targets_ref = (randomized_ ?
targets_random_ : targets_);
const Vector<BaseFloat> &confidence_ref = (randomized_ ?
confidence_random_ : confidence_);
// init the output
features->Resize(bunchsize_, features_.NumCols());
targets->resize(bunchsize_);
confidence->Resize(bunchsize_);
// copy the output
features->CopyFromMat(features_ref.RowRange(emptying_pos_, bunchsize_));
std::copy(targets_ref.begin() + emptying_pos_,
targets_ref.begin() + emptying_pos_ + bunchsize_,
targets->begin());
confidence->CopyFromVec(confidence_ref.Range(emptying_pos_, bunchsize_));
// update position
emptying_pos_ += bunchsize_;
// If we're done, change state to EMPTY
if (emptying_pos_ > filling_pos_ - bunchsize_) {
// we don't have more complete bunches...
state_ = EMPTY;
}
}
} // namespace nnet1
} // namespace kaldi

107
src/nnet/nnet-cache-conf.h Normal file
Просмотреть файл

@ -0,0 +1,107 @@
// nnet/nnet-cache-conf.h
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_NNET_NNET_CACHE_CONF_H_
#define KALDI_NNET_NNET_CACHE_CONF_H_
#include "base/kaldi-math.h"
#include "cudamatrix/cu-matrix.h"
#include "cudamatrix/cu-math.h"
namespace kaldi {
namespace nnet1 {
/**
* The feature-target pair cache
*/
class CacheConf {
typedef enum { EMPTY, FILLING, FULL, EMPTYING } State;
public:
CacheConf() : state_(EMPTY), filling_pos_(0), emptying_pos_(0),
cachesize_(0), bunchsize_(0), randomized_(false)
{ }
~CacheConf() { }
/// Initialize the cache
void Init(int32 cachesize, int32 bunchsize);
/// Add data to cache
void AddData(const CuMatrix<BaseFloat> &features, const std::vector<int32> &targets, const Vector<BaseFloat> &confidence);
/// Randomizes the cache
void Randomize();
/// Get the bunch of training data from cache
void GetBunch(CuMatrix<BaseFloat> *features, std::vector<int32> *targets, Vector<BaseFloat> *confidence);
/// Returns true if the cache was completely filled
bool Full() {
return (state_ == FULL);
}
/// Returns true if the cache is empty
bool Empty() {
return (state_ == EMPTY || filling_pos_ < bunchsize_);
}
/// Returns true if the cache is empty
bool Randomized() {
return randomized_;
}
private:
struct GenerateRandom {
int32 operator()(int32 max) const {
// return lrand48() % max;
return RandInt(0, max-1);
}
};
State state_; ///< Current state of the cache
int32 filling_pos_; ///< Number of frames filled to cache by AddData
int32 emptying_pos_; ///< Number of frames given by cache by GetBunch
int32 cachesize_; ///< Size of cache
int32 bunchsize_; ///< Size of bunch
bool randomized_;
CuMatrix<BaseFloat> features_; ///< Feature cache
CuMatrix<BaseFloat> features_random_; ///< Feature cache
CuMatrix<BaseFloat> features_leftover_; ///< Feature cache
std::vector<int32> targets_; ///< Desired vector cache
std::vector<int32> targets_random_; ///< Desired vector cache
std::vector<int32> targets_leftover_; ///< Desired vector cache
Vector<BaseFloat> confidence_;
Vector<BaseFloat> confidence_random_;
Vector<BaseFloat> confidence_leftover_;
std::vector<int32> randmask_;
CuArray<int32> randmask_device_;
};
} // namespace nnet1
} // namespace kaldi
#endif

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-cache-tgtmat.cc
// Copyright 2011 Karel Vesely
// Copyright 2011 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-cache-tgtmat.h
// Copyright 2012 Karel Vesely
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-cache.cc
// Copyright 2011 Karel Vesely
// Copyright 2011 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-cache.h
// Copyright 2012 Karel Vesely
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-component.cc
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//
@ -64,14 +64,24 @@ Component::ComponentType Component::MarkerToType(const std::string &s) {
}
Component* Component::Read(std::istream &is, bool binary, Nnet *nnet) {
Component* Component::Read(std::istream &is, bool binary) {
int32 dim_out, dim_in;
std::string token;
int first_char = Peek(is, binary);
if (first_char == EOF) return NULL;
ReadToken(is, binary, &token);
ReadToken(is, binary, &token);
// Skip optional initial token
if(token == "<Nnet>") {
ReadToken(is, binary, &token); // Next token is a Component
}
// Finish reading when optional terminal token appears
if(token == "</Nnet>") {
return NULL;
}
Component::ComponentType comp_type = Component::MarkerToType(token);
ReadBasicType(is, binary, &dim_out);
@ -80,34 +90,34 @@ Component* Component::Read(std::istream &is, bool binary, Nnet *nnet) {
Component *p_comp=NULL;
switch (comp_type) {
case Component::kAffineTransform :
p_comp = new AffineTransform(dim_in, dim_out, nnet);
p_comp = new AffineTransform(dim_in, dim_out);
break;
case Component::kSoftmax :
p_comp = new Softmax(dim_in, dim_out, nnet);
p_comp = new Softmax(dim_in, dim_out);
break;
case Component::kSigmoid :
p_comp = new Sigmoid(dim_in, dim_out, nnet);
p_comp = new Sigmoid(dim_in, dim_out);
break;
case Component::kTanh :
p_comp = new Tanh(dim_in, dim_out, nnet);
p_comp = new Tanh(dim_in, dim_out);
break;
case Component::kDropout :
p_comp = new Dropout(dim_in, dim_out, nnet);
p_comp = new Dropout(dim_in, dim_out);
break;
case Component::kRbm :
p_comp = new Rbm(dim_in, dim_out, nnet);
p_comp = new Rbm(dim_in, dim_out);
break;
case Component::kSplice :
p_comp = new Splice(dim_in, dim_out, nnet);
p_comp = new Splice(dim_in, dim_out);
break;
case Component::kCopy :
p_comp = new Copy(dim_in, dim_out, nnet);
p_comp = new CopyComponent(dim_in, dim_out);
break;
case Component::kAddShift :
p_comp = new AddShift(dim_in, dim_out, nnet);
p_comp = new AddShift(dim_in, dim_out);
break;
case Component::kRescale :
p_comp = new Rescale(dim_in, dim_out, nnet);
p_comp = new Rescale(dim_in, dim_out);
break;
case Component::kUnknown :
default :

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-component.h
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//
@ -34,27 +34,17 @@
namespace kaldi {
namespace nnet1 {
// declare the nnet class so we can declare pointer
struct NnetTrainOptions;
class Nnet;
/**
* Abstract class, basic element of the network,
* it is a box with defined inputs, outputs,
* and tranformation functions interface.
*
* It is able to propagate and backpropagate
* exact implementation is to be implemented in descendants.
*
* The data buffers are not included
* and will be managed from outside.
* Abstract class, building block of the network.
* It is able to propagate (PropagateFnc: compute the output based on its input)
* and backpropagate (BackpropagateFnc: i.e. transform loss derivative w.r.t. output to derivative w.r.t. the input)
* the formulas are implemented in descendant classes (AffineTransform,Sigmoid,Softmax,...).
*/
class Component {
// Polymorphic Component RTTI
/// Component type identification mechanism
public:
/// Types of the net components
/// Types of the Components
typedef enum {
kUnknown = 0x0,
@ -77,23 +67,27 @@ class Component {
kRescale,
kLog
} ComponentType;
/// Pair of type and marker
/// A pair of type and marker
struct key_value {
const Component::ComponentType key;
const char *value;
};
/// Mapping of types and markers
/// Mapping of types and markers (the table is defined in nnet-component.cc)
static const struct key_value kMarkerMap[];
/// Convert component type to marker
static const char* TypeToMarker(ComponentType t);
/// Convert marker to component type
static ComponentType MarkerToType(const std::string &s);
Component(int32 input_dim, int32 output_dim, Nnet *nnet)
: input_dim_(input_dim), output_dim_(output_dim), nnet_(nnet) { }
virtual ~Component() { }
/// General interface of a component
public:
Component(int32 input_dim, int32 output_dim)
: input_dim_(input_dim), output_dim_(output_dim) { }
virtual ~Component() { }
/// Copy component (deep copy).
virtual Component* Copy() const = 0;
/// Get Type Identification of the component
virtual ComponentType GetType() const = 0;
/// Check if contains trainable parameters
@ -110,28 +104,29 @@ class Component {
return output_dim_;
}
/// Perform forward pass propagateion Input->Output
/// Perform forward pass propagation Input->Output
void Propagate(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out);
/// Perform backward pass propagation, out_diff -> in_diff
/// '&in' and '&out' will often be unused...
/// '&in' and '&out' will sometimes be unused...
void Backpropagate(const CuMatrix<BaseFloat> &in,
const CuMatrix<BaseFloat> &out,
const CuMatrix<BaseFloat> &out_diff,
CuMatrix<BaseFloat> *in_diff);
/// Read component from stream
static Component* Read(std::istream &is, bool binary, Nnet *nnet);
static Component* Read(std::istream &is, bool binary);
/// Write component to stream
void Write(std::ostream &os, bool binary) const;
/// Optionally print some additional info
virtual std::string Info() const { return ""; }
// abstract interface for propagation/backpropagation
/// Abstract interface for propagation/backpropagation
protected:
/// Forward pass transformation (to be implemented by descendents...)
/// Forward pass transformation (to be implemented by descending class...)
virtual void PropagateFnc(const CuMatrix<BaseFloat> &in,
CuMatrix<BaseFloat> *out) = 0;
/// Backward pass transformation (to be implemented by descendents...)
/// Backward pass transformation (to be implemented by descending class...)
virtual void BackpropagateFnc(const CuMatrix<BaseFloat> &in,
const CuMatrix<BaseFloat> &out,
const CuMatrix<BaseFloat> &out_diff,
@ -144,26 +139,24 @@ class Component {
virtual void WriteData(std::ostream &os, bool binary) const { }
// data members
/// Data members
protected:
int32 input_dim_; ///< Size of input vectors
int32 output_dim_; ///< Size of output vectors
Nnet *nnet_; ///< Pointer to the whole network
private:
KALDI_DISALLOW_COPY_AND_ASSIGN(Component);
protected:
//KALDI_DISALLOW_COPY_AND_ASSIGN(Component);
};
/**
* Class UpdatableComponent is a Component which has
* trainable parameters and contains SGD training
* hyper-parameters (learnrate, momenutm, L2, L1)
* Class UpdatableComponent is a Component which has trainable parameters,
* contains SGD training hyper-parameters in NnetTrainOptions.
*/
class UpdatableComponent : public Component {
public:
UpdatableComponent(int32 input_dim, int32 output_dim, Nnet *nnet)
: Component(input_dim, output_dim, nnet) { }
UpdatableComponent(int32 input_dim, int32 output_dim)
: Component(input_dim, output_dim) { }
virtual ~UpdatableComponent() { }
/// Check if contains trainable parameters
@ -176,7 +169,7 @@ class UpdatableComponent : public Component {
const CuMatrix<BaseFloat> &diff) = 0;
/// Sets the training options to the component
void SetTrainOptions(const NnetTrainOptions &opts) {
virtual void SetTrainOptions(const NnetTrainOptions &opts) {
opts_ = opts;
}
/// Gets the training options from the component
@ -190,18 +183,17 @@ class UpdatableComponent : public Component {
};
inline void Component::Propagate(const CuMatrix<BaseFloat> &in,
CuMatrix<BaseFloat> *out) {
// Check the dims
if (input_dim_ != in.NumCols()) {
KALDI_ERR << "Nonmatching dims, component:" << input_dim_ << " data:" << in.NumCols();
KALDI_ERR << "Non-matching dims, component:" << input_dim_ << " data:" << in.NumCols();
}
// Allocate target buffer
if (output_dim_ != out->NumCols() || in.NumRows() != out->NumRows()) {
out->Resize(in.NumRows(), output_dim_);
}
// Call the propagation implementation of the component
PropagateFnc(in, out);
}
@ -210,27 +202,26 @@ inline void Component::Backpropagate(const CuMatrix<BaseFloat> &in,
const CuMatrix<BaseFloat> &out,
const CuMatrix<BaseFloat> &out_diff,
CuMatrix<BaseFloat> *in_diff) {
//check the dims
// Check the dims
if (output_dim_ != out_diff.NumCols()) {
KALDI_ERR << "Nonmatching output dims, component:" << output_dim_
KALDI_ERR << "Non-matching output dims, component:" << output_dim_
<< " data:" << out_diff.NumCols();
}
//allocate buffer
// Allocate target buffer
if (input_dim_ != in_diff->NumCols() || out_diff.NumRows() != in_diff->NumRows()) {
in_diff->Resize(out_diff.NumRows(), input_dim_);
}
//asserts on the dims
// Asserts on the dims
KALDI_ASSERT((in.NumRows() == out.NumRows()) &&
(in.NumRows() == out_diff.NumRows()) &&
(in.NumRows() == in_diff->NumRows()));
KALDI_ASSERT(in.NumCols() == in_diff->NumCols());
KALDI_ASSERT(out.NumCols() == out_diff.NumCols());
//call the backprop implementation of the component
// Call the backprop implementation of the component
BackpropagateFnc(in, out, out_diff, in_diff);
}
} // namespace nnet1
} // namespace kaldi

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-loss-prior.cc
// Copyright 2012 Karel Vesely
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-loss-prior.h
// Copyright 2012 Karel Vesely
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-loss.cc
// Copyright 2011 Karel Vesely
// Copyright 2011 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-loss.h
// Copyright 2011 Karel Vesely
// Copyright 2011 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-nnet.cc
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//
@ -27,59 +27,66 @@
namespace kaldi {
namespace nnet1 {
Nnet::~Nnet() {
for(int32 i=0; i<NumComponents(); i++) {
delete components_[i];
}
}
void Nnet::Propagate(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
KALDI_ASSERT(NULL != out);
if (LayerCount() == 0) {
if (NumComponents() == 0) {
out->Resize(in.NumRows(), in.NumCols());
out->CopyFromMat(in);
return;
}
// we need at least L+1 input buffers
KALDI_ASSERT((int32)propagate_buf_.size() >= LayerCount()+1);
KALDI_ASSERT((int32)propagate_buf_.size() >= NumComponents()+1);
propagate_buf_[0].Resize(in.NumRows(), in.NumCols());
propagate_buf_[0].CopyFromMat(in);
for(int32 i=0; i<(int32)nnet_.size(); i++) {
nnet_[i]->Propagate(propagate_buf_[i], &propagate_buf_[i+1]);
for(int32 i=0; i<(int32)components_.size(); i++) {
components_[i]->Propagate(propagate_buf_[i], &propagate_buf_[i+1]);
}
CuMatrix<BaseFloat> &mat = propagate_buf_[nnet_.size()];
CuMatrix<BaseFloat> &mat = propagate_buf_[components_.size()];
out->Resize(mat.NumRows(), mat.NumCols());
out->CopyFromMat(mat);
}
void Nnet::Backpropagate(const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat> *in_diff) {
if(LayerCount() == 0) { KALDI_ERR << "Cannot backpropagate on empty network"; }
if(NumComponents() == 0) { KALDI_ERR << "Cannot backpropagate on empty network"; }
// we need at least L+1 input bufers
KALDI_ASSERT((int32)propagate_buf_.size() >= LayerCount()+1);
KALDI_ASSERT((int32)propagate_buf_.size() >= NumComponents()+1);
// we need at least L-1 error derivative bufers
KALDI_ASSERT((int32)backpropagate_buf_.size() >= LayerCount()-1);
KALDI_ASSERT((int32)backpropagate_buf_.size() >= NumComponents()-1);
//////////////////////////////////////
// Backpropagation
//
// don't copy the out_diff to buffers, use it as is...
int32 i = nnet_.size()-1;
nnet_.back()->Backpropagate(propagate_buf_[i], propagate_buf_[i+1],
// we don't copy the out_diff to buffers, we use it as it is...
int32 i = components_.size()-1;
components_.back()->Backpropagate(propagate_buf_[i], propagate_buf_[i+1],
out_diff, &backpropagate_buf_[i-1]);
if (nnet_[i]->IsUpdatable()) {
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(nnet_[i]);
if (components_[i]->IsUpdatable()) {
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(components_[i]);
uc->Update(propagate_buf_[i], out_diff);
}
// backpropagate by using buffers
for(i--; i >= 1; i--) {
nnet_[i]->Backpropagate(propagate_buf_[i], propagate_buf_[i+1],
components_[i]->Backpropagate(propagate_buf_[i], propagate_buf_[i+1],
backpropagate_buf_[i], &backpropagate_buf_[i-1]);
if (nnet_[i]->IsUpdatable()) {
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(nnet_[i]);
if (components_[i]->IsUpdatable()) {
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(components_[i]);
uc->Update(propagate_buf_[i], backpropagate_buf_[i]);
}
}
@ -87,13 +94,13 @@ void Nnet::Backpropagate(const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat
// now backpropagate through first layer,
// but only if asked to (by in_diff pointer)
if (NULL != in_diff) {
nnet_[0]->Backpropagate(propagate_buf_[0], propagate_buf_[1],
components_[0]->Backpropagate(propagate_buf_[0], propagate_buf_[1],
backpropagate_buf_[0], in_diff);
}
// update the first layer
if (nnet_[0]->IsUpdatable()) {
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(nnet_[0]);
if (components_[0]->IsUpdatable()) {
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(components_[0]);
uc->Update(propagate_buf_[0], backpropagate_buf_[0]);
}
@ -106,14 +113,14 @@ void Nnet::Backpropagate(const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat
void Nnet::Feedforward(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
KALDI_ASSERT(NULL != out);
if (LayerCount() == 0) {
if (NumComponents() == 0) {
out->Resize(in.NumRows(), in.NumCols());
out->CopyFromMat(in);
return;
}
if (LayerCount() == 1) {
nnet_[0]->Propagate(in, out);
if (NumComponents() == 1) {
components_[0]->Propagate(in, out);
return;
}
@ -122,27 +129,81 @@ void Nnet::Feedforward(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out)
// propagate by using exactly 2 auxiliary buffers
int32 L = 0;
nnet_[L]->Propagate(in, &propagate_buf_[L%2]);
for(L++; L<=LayerCount()-2; L++) {
nnet_[L]->Propagate(propagate_buf_[(L-1)%2], &propagate_buf_[L%2]);
components_[L]->Propagate(in, &propagate_buf_[L%2]);
for(L++; L<=NumComponents()-2; L++) {
components_[L]->Propagate(propagate_buf_[(L-1)%2], &propagate_buf_[L%2]);
}
nnet_[L]->Propagate(propagate_buf_[(L-1)%2], out);
components_[L]->Propagate(propagate_buf_[(L-1)%2], out);
// release the buffers we don't need anymore
propagate_buf_[0].Resize(0,0);
propagate_buf_[1].Resize(0,0);
}
int32 Nnet::OutputDim() const {
KALDI_ASSERT(!components_.empty());
return components_.back()->OutputDim();
}
int32 Nnet::InputDim() const {
KALDI_ASSERT(!components_.empty());
return components_.front()->InputDim();
}
const Component& Nnet::GetComponent(int32 component) const {
KALDI_ASSERT(static_cast<size_t>(component) < components_.size());
return *(components_[component]);
}
Component& Nnet::GetComponent(int32 component) {
KALDI_ASSERT(static_cast<size_t>(component) < components_.size());
return *(components_[component]);
}
void Nnet::SetComponent(int32 c, Component *component) {
KALDI_ASSERT(static_cast<size_t>(c) < components_.size());
delete components_[c];
components_[c] = component;
Check(); // Check that all the dimensions still match up.
}
void Nnet::AppendComponent(Component* dynamically_allocated_comp) {
components_.push_back(dynamically_allocated_comp);
Check();
}
void Nnet::AppendNnet(const Nnet& nnet_to_append) {
for(int32 i=0; i<nnet_to_append.NumComponents(); i++) {
AppendComponent(nnet_to_append.GetComponent(i).Copy());
}
Check();
}
void Nnet::RemoveComponent(int32 component) {
KALDI_ASSERT(component < NumComponents());
Component* ptr = components_[component];
components_.erase(components_.begin()+component);
delete ptr;
Check();
}
void Nnet::GetWeights(Vector<BaseFloat>* wei_copy) {
wei_copy->Resize(NumParams());
int32 pos = 0;
//copy the params
for(int32 n=0; n<nnet_.size(); n++) {
if(nnet_[n]->IsUpdatable()) {
switch(nnet_[n]->GetType()) {
for(int32 n=0; n<components_.size(); n++) {
if(components_[n]->IsUpdatable()) {
switch(components_[n]->GetType()) {
case Component::kAffineTransform : {
//get the weights from CuMatrix to Matrix
const CuMatrix<BaseFloat>& cu_mat =
dynamic_cast<AffineTransform*>(nnet_[n])->GetLinearity();
dynamic_cast<AffineTransform*>(components_[n])->GetLinearity();
Matrix<BaseFloat> mat(cu_mat.NumRows(),cu_mat.NumCols());
cu_mat.CopyToMat(&mat);
//copy the the matrix row-by-row to the vector
@ -151,7 +212,7 @@ void Nnet::GetWeights(Vector<BaseFloat>* wei_copy) {
pos += mat_size;
//get the biases from CuVector to Vector
const CuVector<BaseFloat>& cu_vec =
dynamic_cast<AffineTransform*>(nnet_[n])->GetBias();
dynamic_cast<AffineTransform*>(components_[n])->GetBias();
Vector<BaseFloat> vec(cu_vec.Dim());
cu_vec.CopyToVec(&vec);
//append biases to the supervector
@ -161,7 +222,7 @@ void Nnet::GetWeights(Vector<BaseFloat>* wei_copy) {
default :
KALDI_ERR << "Unimplemented access to parameters "
<< "of updatable component "
<< Component::TypeToMarker(nnet_[n]->GetType());
<< Component::TypeToMarker(components_[n]->GetType());
}
}
}
@ -172,12 +233,12 @@ void Nnet::GetWeights(Vector<BaseFloat>* wei_copy) {
void Nnet::SetWeights(const Vector<BaseFloat>& wei_src) {
KALDI_ASSERT(wei_src.Dim() == NumParams());
int32 pos = 0;
for(int32 n=0; n<nnet_.size(); n++) {
if(nnet_[n]->IsUpdatable()) {
switch(nnet_[n]->GetType()) {
for(int32 n=0; n<components_.size(); n++) {
if(components_[n]->IsUpdatable()) {
switch(components_[n]->GetType()) {
case Component::kAffineTransform : {
//get the component
AffineTransform* aff_t = dynamic_cast<AffineTransform*>(nnet_[n]);
AffineTransform* aff_t = dynamic_cast<AffineTransform*>(components_[n]);
//we need weight matrix with original dimensions
const CuMatrix<BaseFloat>& cu_mat = aff_t->GetLinearity();
Matrix<BaseFloat> mat(cu_mat.NumRows(),cu_mat.NumCols());
@ -205,7 +266,7 @@ void Nnet::SetWeights(const Vector<BaseFloat>& wei_src) {
default :
KALDI_ERR << "Unimplemented access to parameters "
<< "of updatable component "
<< Component::TypeToMarker(nnet_[n]->GetType());
<< Component::TypeToMarker(components_[n]->GetType());
}
}
}
@ -217,13 +278,13 @@ void Nnet::GetGradient(Vector<BaseFloat>* grad_copy) {
grad_copy->Resize(NumParams());
int32 pos = 0;
//copy the params
for(int32 n=0; n<nnet_.size(); n++) {
if(nnet_[n]->IsUpdatable()) {
switch(nnet_[n]->GetType()) {
for(int32 n=0; n<components_.size(); n++) {
if(components_[n]->IsUpdatable()) {
switch(components_[n]->GetType()) {
case Component::kAffineTransform : {
//get the weights from CuMatrix to Matrix
const CuMatrix<BaseFloat>& cu_mat =
dynamic_cast<AffineTransform*>(nnet_[n])->GetLinearityCorr();
dynamic_cast<AffineTransform*>(components_[n])->GetLinearityCorr();
Matrix<BaseFloat> mat(cu_mat.NumRows(),cu_mat.NumCols());
cu_mat.CopyToMat(&mat);
//copy the the matrix row-by-row to the vector
@ -232,7 +293,7 @@ void Nnet::GetGradient(Vector<BaseFloat>* grad_copy) {
pos += mat_size;
//get the biases from CuVector to Vector
const CuVector<BaseFloat>& cu_vec =
dynamic_cast<AffineTransform*>(nnet_[n])->GetBiasCorr();
dynamic_cast<AffineTransform*>(components_[n])->GetBiasCorr();
Vector<BaseFloat> vec(cu_vec.Dim());
cu_vec.CopyToVec(&vec);
//append biases to the supervector
@ -242,7 +303,7 @@ void Nnet::GetGradient(Vector<BaseFloat>* grad_copy) {
default :
KALDI_ERR << "Unimplemented access to parameters "
<< "of updatable component "
<< Component::TypeToMarker(nnet_[n]->GetType());
<< Component::TypeToMarker(components_[n]->GetType());
}
}
}
@ -252,14 +313,14 @@ void Nnet::GetGradient(Vector<BaseFloat>* grad_copy) {
int32 Nnet::NumParams() const {
int32 n_params = 0;
for(int32 n=0; n<nnet_.size(); n++) {
if(nnet_[n]->IsUpdatable()) {
switch(nnet_[n]->GetType()) {
for(int32 n=0; n<components_.size(); n++) {
if(components_[n]->IsUpdatable()) {
switch(components_[n]->GetType()) {
case Component::kAffineTransform :
n_params += (1 + nnet_[n]->InputDim()) * nnet_[n]->OutputDim();
n_params += (1 + components_[n]->InputDim()) * components_[n]->OutputDim();
break;
default :
KALDI_WARN << Component::TypeToMarker(nnet_[n]->GetType())
KALDI_WARN << Component::TypeToMarker(components_[n]->GetType())
<< "is updatable, but its parameter count not implemented";
}
}
@ -268,40 +329,95 @@ int32 Nnet::NumParams() const {
}
void Nnet::Read(std::istream &in, bool binary) {
void Nnet::Read(const std::string &file) {
bool binary;
Input in(file, &binary);
Read(in.Stream(), binary);
in.Close();
// Warn if the NN is empty
if(NumComponents() == 0) {
KALDI_WARN << "The network '" << file << "' is empty.";
}
}
void Nnet::Read(std::istream &is, bool binary) {
// get the network layers from a factory
Component *comp;
while (NULL != (comp = Component::Read(in, binary, this))) {
if (LayerCount() > 0 && nnet_.back()->OutputDim() != comp->InputDim()) {
while (NULL != (comp = Component::Read(is, binary))) {
if (NumComponents() > 0 && components_.back()->OutputDim() != comp->InputDim()) {
KALDI_ERR << "Dimensionality mismatch!"
<< " Previous layer output:" << nnet_.back()->OutputDim()
<< " Previous layer output:" << components_.back()->OutputDim()
<< " Current layer input:" << comp->InputDim();
}
nnet_.push_back(comp);
components_.push_back(comp);
}
// create empty buffers
propagate_buf_.resize(LayerCount()+1);
backpropagate_buf_.resize(LayerCount()-1);
propagate_buf_.resize(NumComponents()+1);
backpropagate_buf_.resize(NumComponents()-1);
// reset learn rate
opts_.learn_rate = 0.0;
Check(); //check consistency (dims...)
}
void Nnet::Write(const std::string &file, bool binary) {
Output out(file, binary, true);
Write(out.Stream(), binary);
out.Close();
}
void Nnet::Write(std::ostream &os, bool binary) {
Check();
WriteToken(os, binary, "<Nnet>");
if(binary == false) os << std::endl;
for(int32 i=0; i<NumComponents(); i++) {
components_[i]->Write(os, binary);
}
WriteToken(os, binary, "</Nnet>");
if(binary == false) os << std::endl;
}
std::string Nnet::Info() const {
std::ostringstream ostr;
ostr << "num-components " << LayerCount() << std::endl;
ostr << "num-components " << NumComponents() << std::endl;
ostr << "input-dim " << InputDim() << std::endl;
ostr << "output-dim " << OutputDim() << std::endl;
ostr << "number-of-parameters " << static_cast<float>(NumParams())/1e6
<< " millions" << std::endl;
for (int32 i = 0; i < LayerCount(); i++)
for (int32 i = 0; i < NumComponents(); i++)
ostr << "component " << i+1 << " : "
<< Component::TypeToMarker(nnet_[i]->GetType())
<< ", input-dim " << nnet_[i]->InputDim()
<< ", output-dim " << nnet_[i]->OutputDim()
<< ", " << nnet_[i]->Info() << std::endl;
<< Component::TypeToMarker(components_[i]->GetType())
<< ", input-dim " << components_[i]->InputDim()
<< ", output-dim " << components_[i]->OutputDim()
<< ", " << components_[i]->Info() << std::endl;
return ostr.str();
}
void Nnet::Check() const {
for (size_t i = 0; i + 1 < components_.size(); i++) {
KALDI_ASSERT(components_[i] != NULL);
int32 output_dim = components_[i]->OutputDim(),
next_input_dim = components_[i+1]->InputDim();
KALDI_ASSERT(output_dim == next_input_dim);
}
}
void Nnet::SetTrainOptions(const NnetTrainOptions& opts) {
opts_ = opts;
//set values to individual components
for (int32 l=0; l<NumComponents(); l++) {
if(GetComponent(l).IsUpdatable()) {
dynamic_cast<UpdatableComponent&>(GetComponent(l)).SetTrainOptions(opts_);
}
}
}
} // namespace nnet1
} // namespace kaldi

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-nnet.h
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//
@ -51,31 +51,28 @@ class Nnet {
int32 InputDim() const;
/// Dimensionality of network outputs (posteriors | bn-features | etc.)
int32 OutputDim() const;
/// Returns number of layers in the network
int32 LayerCount() const {
return nnet_.size();
}
/// Access to an individual layer (unprotected)
Component* Layer(int32 index) {
return nnet_[index];
}
/// Get the position of a layer in the network
int32 IndexOfLayer(const Component& comp) const;
/// Returns number of components-- think of this as similar to # of layers, but
/// e.g. the nonlinearity and the linear part count as separate components,
/// so the number of components will be more than the number of layers.
int32 NumComponents() const { return components_.size(); }
const Component& GetComponent(int32 c) const;
Component& GetComponent(int32 c);
/// Sets the c'th component to "component", taking ownership of the pointer
/// and deleting the corresponding one that we own.
void SetComponent(int32 c, Component *component);
/// Add another layer
/// Warning : the Nnet over-takes responsibility for freeing the memory
/// so use dynamically allocated Component only!
void AppendLayer(Component* dynamically_allocated_comp);
/// Concatenate the network
/// Warning : this is destructive, the arg src_nnet_will_be_empty
/// will be empty network after calling this method
void Concatenate(Nnet* src_nnet_will_be_empty);
/// Remove layer (checks for meaningful dimensions after removal)
void RemoveLayer(int32 index);
void RemoveLastLayer() {
RemoveLayer(LayerCount()-1);
}
/// Appends this component to the components already in the neural net.
/// Takes ownership of the pointer
void AppendComponent(Component *dynamically_allocated_comp);
/// Append another network to the current one (copy components).
void AppendNnet(const Nnet& nnet_to_append);
/// Remove component
void RemoveComponent(int32 c);
void RemoveLastComponent() { RemoveComponent(NumComponents()-1); }
/// Access to forward pass buffers
const std::vector<CuMatrix<BaseFloat> >& PropagateBuffer() const {
@ -86,7 +83,7 @@ class Nnet {
return backpropagate_buf_;
}
/// get the number of parameters in the network
/// Get the number of parameters in the network
int32 NumParams() const;
/// Get the network weights in a supervector
void GetWeights(Vector<BaseFloat>* wei_copy);
@ -103,8 +100,11 @@ class Nnet {
void Write(const std::string &file, bool binary);
/// Write MLP to stream
void Write(std::ostream &out, bool binary);
/// Create string with human readable description of the nnet instance
std::string Info() const;
/// Consistency check.
void Check() const;
/// Set training hyper-parameters to the network and its UpdatableComponent(s)
void SetTrainOptions(const NnetTrainOptions& opts);
@ -114,11 +114,9 @@ class Nnet {
}
private:
/// NnetType is alias to vector of components
typedef std::vector<Component*> NnetType;
/// Vector which contains all the layers composing the network network,
/// also non-linearities (sigmoid|softmax|tanh|...) are considered as layers.
NnetType nnet_;
/// Vector which contains all the components composing the neural network,
/// the components are for example: AffineTransform, Sigmoid, Softmax
std::vector<Component*> components_;
std::vector<CuMatrix<BaseFloat> > propagate_buf_; ///< buffers for forward pass
std::vector<CuMatrix<BaseFloat> > backpropagate_buf_; ///< buffers for backward pass
@ -130,109 +128,6 @@ class Nnet {
};
inline Nnet::~Nnet() {
// delete all the components
NnetType::iterator it;
for(it=nnet_.begin(); it!=nnet_.end(); ++it) {
delete *it;
}
}
inline int32 Nnet::InputDim() const {
if (LayerCount() == 0) {
KALDI_ERR << "No layers in MLP";
}
return nnet_.front()->InputDim();
}
inline int32 Nnet::OutputDim() const {
if (LayerCount() <= 0) {
KALDI_ERR << "No layers in MLP";
}
return nnet_.back()->OutputDim();
}
inline int32 Nnet::IndexOfLayer(const Component &comp) const {
for(int32 i=0; i<LayerCount(); i++) {
if (&comp == nnet_[i]) return i;
}
KALDI_ERR << "Component:" << &comp
<< " type:" << comp.GetType()
<< " not found in the MLP";
return -1;
}
inline void Nnet::AppendLayer(Component* dynamically_allocated_comp) {
if(LayerCount() > 0) {
KALDI_ASSERT(OutputDim() == dynamically_allocated_comp->InputDim());
}
nnet_.push_back(dynamically_allocated_comp);
}
inline void Nnet::Concatenate(Nnet* src_nnet_will_be_empty) {
if(LayerCount() > 0) {
KALDI_ASSERT(OutputDim() == src_nnet_will_be_empty->InputDim());
}
nnet_.insert(nnet_.end(),
src_nnet_will_be_empty->nnet_.begin(),
src_nnet_will_be_empty->nnet_.end());
src_nnet_will_be_empty->nnet_.clear();
}
inline void Nnet::RemoveLayer(int32 index) {
//make sure we don't break the dimensionalities in the nnet
KALDI_ASSERT(index < LayerCount());
KALDI_ASSERT(index == LayerCount()-1 || Layer(index)->InputDim() == Layer(index)->OutputDim());
//remove element from the vector
Component* ptr = nnet_[index];
nnet_.erase(nnet_.begin()+index);
delete ptr;
}
inline void Nnet::Read(const std::string &file) {
bool binary;
Input in(file, &binary);
Read(in.Stream(), binary);
in.Close();
// Warn if the NN is empty
if(LayerCount() == 0) {
KALDI_WARN << "The network '" << file << "' is empty.";
}
}
inline void Nnet::Write(const std::string &file, bool binary) {
Output out(file, binary, true);
Write(out.Stream(), binary);
out.Close();
}
inline void Nnet::Write(std::ostream &out, bool binary) {
for(int32 i=0; i<LayerCount(); i++) {
nnet_[i]->Write(out, binary);
}
}
inline void Nnet::SetTrainOptions(const NnetTrainOptions& opts) {
opts_ = opts;
//set values to individual components
for (int32 l=0; l<LayerCount(); l++) {
if(Layer(l)->IsUpdatable()) {
dynamic_cast<UpdatableComponent*>(Layer(l))->SetTrainOptions(opts_);
}
}
}
} // namespace nnet1
} // namespace kaldi

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-rbm.h
// Copyright 2012-2013 Brno University of Technology (Author: Karel Vesely)
// Copyright 2012-2013 Brno University of Technology (Author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//
@ -35,8 +35,8 @@ class RbmBase : public UpdatableComponent {
GAUSSIAN
} RbmNodeType;
RbmBase(int32 dim_in, int32 dim_out, Nnet *nnet)
: UpdatableComponent(dim_in, dim_out, nnet)
RbmBase(int32 dim_in, int32 dim_out)
: UpdatableComponent(dim_in, dim_out)
{ }
/*Is included in Component:: itf
@ -85,8 +85,8 @@ class RbmBase : public UpdatableComponent {
// RBMs use RbmUpdate(.)
void Update(const CuMatrix<BaseFloat> &input, const CuMatrix<BaseFloat> &diff) { }
// RBMs use option class RbmTrainOptions
void SetTrainOptions(const NnetTrainOptions&);
const NnetTrainOptions& GetTrainOptions() const;
void SetTrainOptions(const NnetTrainOptions&) { }
const NnetTrainOptions& GetTrainOptions() const { }
NnetTrainOptions opts_;
//
////
@ -97,15 +97,14 @@ class RbmBase : public UpdatableComponent {
class Rbm : public RbmBase {
public:
Rbm(int32 dim_in, int32 dim_out, Nnet *nnet)
: RbmBase(dim_in, dim_out, nnet)
Rbm(int32 dim_in, int32 dim_out)
: RbmBase(dim_in, dim_out)
{ }
~Rbm()
{ }
ComponentType GetType() const {
return kRbm;
}
Component* Copy() const { return new Rbm(*this); }
ComponentType GetType() const { return kRbm; }
void ReadData(std::istream &is, bool binary) {
std::string vis_node_type, hid_node_type;
@ -164,7 +163,7 @@ class Rbm : public RbmBase {
void BackpropagateFnc(const CuMatrix<BaseFloat> &in, const CuMatrix<BaseFloat> &out,
const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat> *in_diff) {
KALDI_ERR << "Cannot backpropagate through RBM!"
KALDI_ERR << "Cannot back-propagate through RBM!"
<< "Better convert it to <affinetransform> and <sigmoid>";
}
virtual void Update(const CuMatrix<BaseFloat> &input,
@ -226,9 +225,9 @@ class Rbm : public RbmBase {
// should be about the same. The model is particularly sensitive at the very
// beginning of the CD-1 training.
//
// We compute varinace of a)input minibatch b)reconstruction.
// We compute variance of a)input mini-batch b)reconstruction.
// When the ratio b)/a) is larger than 2, we:
// 1. scale down the weights and biases by b)/a) (for next minibatch b)/a) gets 1.0)
// 1. scale down the weights and biases by b)/a) (for next mini-batch b)/a) gets 1.0)
// 2. shrink learning rate by 0.9x
// 3. reset the momentum buffer
//
@ -255,7 +254,7 @@ class Rbm : public RbmBase {
pos_vis_stddev.MulElements(pos_vis_mean_h);
pos_vis_stddev.Scale(-1.0);
pos_vis_stddev.AddVec(1.0/pos_vis.NumRows(),pos_vis_second_h);
/* set negtive values to zero before the square root */
/* set negative values to zero before the square root */
for (int32 i=0; i<pos_vis_stddev.Dim(); i++) {
if(pos_vis_stddev(i) < 0.0) {
KALDI_WARN << "Forcing the variance to be non-negative! (set to zero)"
@ -282,7 +281,7 @@ class Rbm : public RbmBase {
neg_vis_stddev.MulElements(neg_vis_mean_h);
neg_vis_stddev.Scale(-1.0);
neg_vis_stddev.AddVec(1.0/neg_vis.NumRows(),neg_vis_second_h);
/* set negtive values to zero before the square root */
/* set negative values to zero before the square root */
for (int32 i=0; i<neg_vis_stddev.Dim(); i++) {
if(neg_vis_stddev(i) < 0.0) {
KALDI_WARN << "Forcing the variance to be non-negative! (set to zero)"
@ -306,12 +305,12 @@ class Rbm : public RbmBase {
vis_bias_corr_.SetZero();
hid_bias_corr_.SetZero();
KALDI_WARN << "Discrepancy between pos_hid and neg_hid varainces, "
KALDI_WARN << "Discrepancy between pos_hid and neg_hid variances, "
<< "danger of weight explosion. a) Reducing weights with scale " << scale
<< " b) Lowering learning rate to " << rbm_opts_.learn_rate
<< " [pos_vis_stddev(~1.0):" << pos_vis_stddev.Sum()/pos_vis.NumCols()
<< ",neg_vis_stddev:" << neg_vis_stddev.Sum()/neg_vis.NumCols() << "]";
return; /* ie. don't update weights with current stats */
return; /* i.e. don't update weights with current stats */
}
}
//

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-test.cc
// Copyright 2010 Karel Vesely
// Copyright 2010 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-trnopts.h
// Copyright 2013 Brno University of Technology (Author: Karel Vesely)
// Copyright 2013 Brno University of Technology (Author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//

Просмотреть файл

@ -1,6 +1,6 @@
// nnet/nnet-various.h
// Copyright 2012 Karel Vesely
// Copyright 2012-2013 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//
@ -49,9 +49,9 @@ std::string MomentStatistics(const Vector<Real> &vec) {
vec_aux.MulElements(vec); // (vec-mean)^3
Real skewness = vec_aux.Sum() / pow(variance, 3.0/2.0) / vec.Dim();
// kurtosis (peakedness)
// - makes sence for symmetric distributions (skewness is zero)
// - makes sense for symmetric distributions (skewness is zero)
// - positive : 'sharper peak' than Normal distribution
// - negtive : 'heavier tails' than Normal distribution
// - negative : 'heavier tails' than Normal distribution
// - zero : same peakedness as the Normal distribution
vec_aux.MulElements(vec); // (vec-mean)^4
Real kurtosis = vec_aux.Sum() / (variance * variance) / vec.Dim() - 3.0;
@ -66,6 +66,9 @@ std::string MomentStatistics(const Vector<Real> &vec) {
return ostr.str();
}
/**
* Overload MomentStatistics to Matrix<Real>
*/
template <typename Real>
std::string MomentStatistics(const Matrix<Real> &mat) {
Vector<Real> vec(mat.NumRows()*mat.NumCols());
@ -73,6 +76,9 @@ std::string MomentStatistics(const Matrix<Real> &mat) {
return MomentStatistics(vec);
}
/**
* Overload MomentStatistics to CuVector<Real>
*/
template <typename Real>
std::string MomentStatistics(const CuVector<Real> &vec) {
Vector<Real> vec_host(vec.Dim());
@ -80,6 +86,9 @@ std::string MomentStatistics(const CuVector<Real> &vec) {
return MomentStatistics(vec_host);
}
/**
* Overload MomentStatistics to CuMatrix<Real>
*/
template <typename Real>
std::string MomentStatistics(const CuMatrix<Real> &mat) {
Matrix<Real> mat_host(mat.NumRows(),mat.NumCols());
@ -96,26 +105,25 @@ std::string MomentStatistics(const CuMatrix<Real> &mat) {
*/
class Splice : public Component {
public:
Splice(int32 dim_in, int32 dim_out, Nnet *nnet)
: Component(dim_in, dim_out, nnet)
Splice(int32 dim_in, int32 dim_out)
: Component(dim_in, dim_out)
{ }
~Splice()
{ }
ComponentType GetType() const {
return kSplice;
}
Component* Copy() const { return new Splice(*this); }
ComponentType GetType() const { return kSplice; }
void ReadData(std::istream &is, bool binary) {
//read double vector
// read double vector
Vector<double> vec_d;
vec_d.Read(is, binary);
//convert to int vector
// convert to int vector
std::vector<int32> vec_i(vec_d.Dim());
for(int32 i=0; i<vec_d.Dim(); i++) {
vec_i[i] = round(vec_d(i));
}
//push to GPU
// push to GPU
frame_offsets_.CopyFromVec(vec_i);
}
@ -155,30 +163,29 @@ class Splice : public Component {
/**
* Rearrange the matrix columns according to the indices in copy_from_indices_
*/
class Copy : public Component {
class CopyComponent: public Component {
public:
Copy(int32 dim_in, int32 dim_out, Nnet *nnet)
: Component(dim_in, dim_out, nnet)
CopyComponent(int32 dim_in, int32 dim_out)
: Component(dim_in, dim_out)
{ }
~Copy()
~CopyComponent()
{ }
ComponentType GetType() const {
return kCopy;
}
Component* Copy() const { return new CopyComponent(*this); }
ComponentType GetType() const { return kCopy; }
void ReadData(std::istream &is, bool binary) {
//read double vector
// read double vector
Vector<double> vec_d;
vec_d.Read(is, binary);
//subtract 1
// subtract 1
vec_d.Add(-1.0);
//convert to int vector
// convert to int vector
std::vector<int32> vec_i(vec_d.Dim());
for(int32 i=0; i<vec_d.Dim(); i++) {
vec_i[i] = round(vec_d(i));
}
//push to GPU
// push to GPU
copy_from_indices_.CopyFromVec(vec_i);
}
@ -222,15 +229,14 @@ class Copy : public Component {
*/
class AddShift : public Component {
public:
AddShift(int32 dim_in, int32 dim_out, Nnet *nnet)
: Component(dim_in, dim_out, nnet), shift_data_(dim_in)
AddShift(int32 dim_in, int32 dim_out)
: Component(dim_in, dim_out), shift_data_(dim_in)
{ }
~AddShift()
{ }
ComponentType GetType() const {
return kAddShift;
}
Component* Copy() const { return new AddShift(*this); }
ComponentType GetType() const { return kAddShift; }
void ReadData(std::istream &is, bool binary) {
//read the shift data
@ -279,18 +285,17 @@ class AddShift : public Component {
*/
class Rescale : public Component {
public:
Rescale(int32 dim_in, int32 dim_out, Nnet *nnet)
: Component(dim_in, dim_out, nnet), scale_data_(dim_in)
Rescale(int32 dim_in, int32 dim_out)
: Component(dim_in, dim_out), scale_data_(dim_in)
{ }
~Rescale()
{ }
ComponentType GetType() const {
return kRescale;
}
Component* Copy() const { return new Rescale(*this); }
ComponentType GetType() const { return kRescale; }
void ReadData(std::istream &is, bool binary) {
//read the shift data
// read the shift data
scale_data_.Read(is, binary);
}
@ -304,18 +309,18 @@ class Rescale : public Component {
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
out->CopyFromMat(in);
//rescale the data
// rescale the data
out->MulColsVec(scale_data_);
}
void BackpropagateFnc(const CuMatrix<BaseFloat> &in, const CuMatrix<BaseFloat> &out,
const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat> *in_diff) {
in_diff->CopyFromMat(out_diff);
//derivative gets also scaled by the scale_data_
// derivative gets also scaled by the scale_data_
in_diff->MulColsVec(scale_data_);
}
//Data accessors
// Data accessors
const CuVector<BaseFloat>& GetScaleVec() {
return scale_data_;
}
@ -331,9 +336,6 @@ class Rescale : public Component {
} // namespace nnet1
} // namespace kaldi

Просмотреть файл

@ -24,20 +24,9 @@
namespace kaldi {
namespace nnet2 {
static BaseFloat GetFirstLearningRate(const Nnet &nnet) {
for (int32 c = 0; c < nnet.NumComponents(); c++) {
const UpdatableComponent *uc =
dynamic_cast<const UpdatableComponent*>(&(nnet.GetComponent(c)));
if (uc != NULL)
return uc->LearningRate();
}
KALDI_ERR << "Neural net has no updatable components";
return 0.0;
}
/** This function makes sure the neural net ends with a
MixtureProbComponent. If it doesn't, it adds one
SumGroupComponent. If it doesn't, it adds one
(with a single mixture/matrix corresponding to each
output element.) [Before doing so, it makes sure
that the last layer is a SoftmaxLayer, which is what
@ -48,24 +37,20 @@ static BaseFloat GetFirstLearningRate(const Nnet &nnet) {
static void GiveNnetCorrectTopology(Nnet *nnet,
AffineComponent **affine_component,
SoftmaxComponent **softmax_component,
MixtureProbComponent **mixture_prob_component) {
SumGroupComponent **sum_group_component) {
int32 nc = nnet->NumComponents();
KALDI_ASSERT(nc > 0);
Component* component = &(nnet->GetComponent(nc - 1));
if ((*mixture_prob_component =
dynamic_cast<MixtureProbComponent*>(component)) == NULL) {
KALDI_LOG << "Adding MixtureProbComponent to neural net.";
if ((*sum_group_component =
dynamic_cast<SumGroupComponent*>(component)) == NULL) {
KALDI_LOG << "Adding SumGroupComponent to neural net.";
int32 dim = component->OutputDim();
// Give it the same learning rate as the first updatable layer we have.
BaseFloat learning_rate = GetFirstLearningRate(*nnet),
diag_element = 0.999; // actually it's a don't care.
std::vector<int32> sizes(dim, 1); // a vector of all ones, of dimension "dim".
*mixture_prob_component = new MixtureProbComponent();
(*mixture_prob_component)->Init(learning_rate,
diag_element,
sizes);
nnet->Append(*mixture_prob_component);
*sum_group_component = new SumGroupComponent();
(*sum_group_component)->Init(sizes);
nnet->Append(*sum_group_component);
nc++;
}
component = &(nnet->GetComponent(nc - 2));
@ -84,14 +69,16 @@ static void GiveNnetCorrectTopology(Nnet *nnet,
/**
This function works as follows.
We first make sure the neural net has the correct topology, so its
last component is a MixtureProbComponent.
last component is a SumGroupComponent.
We then get the counts for each matrix in the MixtureProbComponent (these
We then get the counts for each matrix in the SumGroupComponent (these
will either correspond to leaves in the decision tree, or level-1 leaves, if
we have a 2-level-tree system). We work out the total count for each of these
matrices, by getting the count from the SoftmaxComponent.
Then, for each matrix in the Mixturemixture-prob component, we
We then increase, if necessary, the dimensions that the SumGroupComponent sums
over increase the dimension of the SoftmaxComponent if necessary, and duplicate
and then perturb the relevant rows of the AffineComponent.
*/
@ -100,18 +87,18 @@ void MixupNnet(const NnetMixupConfig &mixup_config,
Nnet *nnet) {
AffineComponent *affine_component = NULL;
SoftmaxComponent *softmax_component = NULL;
MixtureProbComponent *mixture_prob_component = NULL;
SumGroupComponent *sum_group_component = NULL;
GiveNnetCorrectTopology(nnet,
&affine_component,
&softmax_component,
&mixture_prob_component); // Adds a MixtureProbComponent if needed.
&sum_group_component); // Adds a SumGroupComponent if needed.
softmax_component->MixUp(mixup_config.num_mixtures,
mixup_config.power,
mixup_config.min_count,
mixup_config.perturb_stddev,
affine_component,
mixture_prob_component);
sum_group_component);
nnet->Check(); // Checks that dimensions all match up.
}
@ -120,15 +107,16 @@ void MixupNnet(const NnetMixupConfig &mixup_config,
void SoftmaxComponent::MixUp(int32 num_mixtures,
BaseFloat power,
BaseFloat min_count,
BaseFloat perturb_stddev,
BaseFloat perturb_stddev,
AffineComponent *ac,
MixtureProbComponent *mc) {
SumGroupComponent *sc) {
// "counts" is derived from this->counts_ by summing.
Vector<BaseFloat> counts(mc->params_.size());
std::vector<int32> old_sizes;
sc->GetSizes(&old_sizes);
Vector<BaseFloat> counts(old_sizes.size());
int32 old_dim = 0;
for (size_t i = 0; i < mc->params_.size(); i++) {
int32 this_input_dim = mc->params_[i].NumCols();
for (size_t i = 0; i < old_sizes.size(); i++) {
int32 this_input_dim = old_sizes[i];
BaseFloat this_tot_count = 0.0; /// Total the count out of
/// all the output dims of the softmax layer that correspond
/// to this mixture. We'll use this total to allocate new quasi-Gaussians.
@ -141,16 +129,18 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
std::vector<int32> targets; // #mixtures for each state.
// Get the target number of mixtures for each state.
GetSplitTargets(counts, num_mixtures, power, min_count, &targets);
KALDI_ASSERT(targets.size() == mc->params_.size());
// floor each target to the current #mixture components.
KALDI_ASSERT(targets.size() == old_sizes.size());
std::vector<int32> new_sizes(old_sizes.size());
for (size_t i = 0; i < targets.size(); i++)
targets[i] = std::max(targets[i], mc->params_[i].NumCols());
int32 new_dim = std::accumulate(targets.begin(), targets.end(),
new_sizes[i] = std::max(targets[i], old_sizes[i]);
int32 new_dim = std::accumulate(new_sizes.begin(), new_sizes.end(),
static_cast<int32>(0)),
affine_input_dim = ac->InputDim();
KALDI_ASSERT(new_dim >= old_dim);
sc->Init(new_sizes);
// bias and linear terms from affine component:
Vector<BaseFloat> old_bias_term(ac->bias_params_);
@ -165,11 +155,10 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
// respectively. They get incremented in the following loop.
int32 old_offset = 0, new_offset = 0;
Vector<BaseFloat> old_counts(this->value_sum_);
for (size_t i = 0; i < mc->params_.size(); i++) {
const CuMatrix<BaseFloat> &this_old_params(mc->params_[i]);
int32 this_old_dim = this_old_params.NumCols(),
this_new_dim = targets[i],
this_cur_dim = this_old_dim; // this_cur_dim is loop variable.
for (size_t i = 0; i < old_sizes.size(); i++) {
int32 this_old_dim = old_sizes[i],
this_new_dim = new_sizes[i],
this_cur_dim = this_old_dim; // this_cur_dim is loop variable.
SubMatrix<BaseFloat> this_old_linear_term(old_linear_term,
old_offset, this_old_dim,
@ -184,8 +173,6 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
old_offset, this_old_dim),
this_new_counts(new_counts,
new_offset, this_new_dim);
Matrix<BaseFloat> this_new_params(this_old_params.NumRows(),
this_new_dim);
// Copy the same-dimensional part of the parameters and counts.
this_new_linear_term.Range(0, this_old_dim, 0, affine_input_dim).
@ -195,8 +182,6 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
this_new_counts.Range(0, this_old_dim).
CopyFromVec(this_old_counts);
// this_new_params is the mixture weights.
this_new_params.Range(0, this_old_params.NumRows(), 0, this_old_dim).
CopyFromMat(this_old_params);
// Add the new components...
for (; this_cur_dim < this_new_dim; this_cur_dim++) {
BaseFloat *count_begin = this_new_counts.Data(),
@ -216,13 +201,9 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
new_vec.AddVec(-perturb_stddev, rand);
this_new_bias_term(max_index) += log(0.5);
this_new_bias_term(new_index) = this_new_bias_term(max_index);
// now copy the column of the MixtureProbComponent parameters.
for (int32 j = 0; j < this_new_params.NumRows(); j++)
this_new_params(j, new_index) = this_new_params(j, max_index);
}
old_offset += this_old_dim;
new_offset += this_new_dim;
mc->params_[i] = this_new_params;
}
KALDI_ASSERT(old_offset == old_dim && new_offset == new_dim);
ac->SetParams(new_bias_term, new_linear_term);
@ -230,8 +211,6 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
this->value_sum_.CopyFromVec(new_counts);
this->count_ = this->value_sum_.Sum();
this->dim_ = new_dim;
mc->input_dim_ = new_dim; // keep this up to date.
// We already updated mc->params_.
KALDI_LOG << "Mixed up from dimension of " << old_dim << " to " << new_dim
<< " in the softmax layer.";
}

Просмотреть файл

@ -27,15 +27,12 @@
namespace kaldi {
namespace nnet2 {
/** Configuration class that controls neural net "mixupage" which is actually a
scaling on the parameters of each of the updatable layers.
*/
struct NnetMixupConfig {
BaseFloat power;
BaseFloat min_count;
int32 num_mixtures;
BaseFloat perturb_stddev;
NnetMixupConfig(): power(0.25), min_count(1000.0),
num_mixtures(-1), perturb_stddev(0.01) { }
@ -58,9 +55,8 @@ struct NnetMixupConfig {
This function does something similar to Gaussian mixture splitting for
GMMs, except applied to the output layer of the neural network.
We create additional outputs, which will be summed over using a
MixtureProbComponent (if one does not already exist, it will be
added.)
*/
SumGroupComponent.
*/
void MixupNnet(const NnetMixupConfig &mixup_config,
Nnet *nnet);

Просмотреть файл

@ -532,6 +532,27 @@ void UnitTestMixtureProbComponent() {
}
}
void UnitTestSumGroupComponent() {
std::vector<int32> sizes;
int32 num_sizes = 1 + rand() % 5;
for (int32 i = 0; i < num_sizes; i++)
sizes.push_back(1 + rand() % 5);
{
SumGroupComponent component;
component.Init(sizes);
UnitTestGenericComponentInternal(component);
}
{
const char *str = "sizes=3:4:5";
SumGroupComponent component;
component.InitFromString(str);
UnitTestGenericComponentInternal(component);
}
}
void UnitTestDctComponent() {
int32 m = 1 + rand() % 4, n = 1 + rand() % 4,
dct_dim = m, dim = m * n;
@ -760,6 +781,7 @@ int main() {
UnitTestBlockAffineComponent();
UnitTestBlockAffineComponentPreconditioned();
UnitTestMixtureProbComponent();
UnitTestSumGroupComponent();
UnitTestDctComponent();
UnitTestFixedLinearComponent();
UnitTestFixedAffineComponent();

Просмотреть файл

@ -75,6 +75,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
ans = new AffinePreconInputComponent();
} else if (component_type == "MixtureProbComponent") {
ans = new MixtureProbComponent();
} else if (component_type == "SumGroupComponent") {
ans = new SumGroupComponent();
} else if (component_type == "BlockAffineComponent") {
ans = new BlockAffineComponent();
} else if (component_type == "BlockAffineComponentPreconditioned") {
@ -2922,6 +2924,92 @@ void MixtureProbComponent::UnVectorize(const VectorBase<BaseFloat> &params) {
KALDI_ASSERT(offset == params.Dim());
}
void SumGroupComponent::Init(const std::vector<int32> &sizes) {
KALDI_ASSERT(!sizes.empty());
std::vector<Int32Pair> cpu_vec(sizes.size());
std::vector<int32> reverse_cpu_vec;
int32 cur_index = 0;
for (size_t i = 0; i < sizes.size(); i++) {
KALDI_ASSERT(sizes[i] > 0);
cpu_vec[i].first = cur_index;
cpu_vec[i].second = cur_index + sizes[i];
cur_index += sizes[i];
for (int32 j = cpu_vec[i].first; j < cpu_vec[i].second; j++)
reverse_cpu_vec.push_back(i);
}
this->indexes_ = cpu_vec;
this->reverse_indexes_ = reverse_cpu_vec;
this->input_dim_ = cur_index;
this->output_dim_ = sizes.size();
}
void SumGroupComponent::InitFromString(std::string args) {
std::string orig_args(args);
std::vector<int32> sizes;
bool ok = ParseFromString("sizes", &args, &sizes);
if (!ok || !args.empty() || sizes.empty())
KALDI_ERR << "Invalid initializer for layer of type "
<< Type() << ": \"" << orig_args << "\"";
this->Init(sizes);
}
Component* SumGroupComponent::Copy() const {
SumGroupComponent *ans = new SumGroupComponent();
ans->indexes_ = indexes_;
ans->reverse_indexes_ = reverse_indexes_;
ans->input_dim_ = input_dim_;
ans->output_dim_ = output_dim_;
return ans;
}
void SumGroupComponent::Read(std::istream &is, bool binary) {
ExpectOneOrTwoTokens(is, binary, "<SumGroupComponent>", "<Sizes>");
std::vector<int32> sizes;
ReadIntegerVector(is, binary, &sizes);
ExpectToken(is, binary, "<SumGroupComponent>");
this->Init(sizes);
}
void SumGroupComponent::GetSizes(std::vector<int32> *sizes) const {
std::vector<Int32Pair> indexes;
indexes_.CopyToVec(&indexes);
sizes->resize(indexes.size());
for (size_t i = 0; i < indexes.size(); i++) {
(*sizes)[i] = indexes[i].second - indexes[i].first;
if (i == 0) { KALDI_ASSERT(indexes[i].first == 0); }
else { KALDI_ASSERT(indexes[i].first == indexes[i-1].second); }
KALDI_ASSERT(indexes[i].second > indexes[i].first);
(*sizes)[i] = indexes[i].second - indexes[i].first;
}
}
void SumGroupComponent::Write(std::ostream &os, bool binary) const {
WriteToken(os, binary, "<SumGroupComponent>");
WriteToken(os, binary, "<Sizes>");
std::vector<int32> sizes;
this->GetSizes(&sizes);
WriteIntegerVector(os, binary, sizes);
WriteToken(os, binary, "<SumGroupComponent>");
}
void SumGroupComponent::Propagate(const CuMatrixBase<BaseFloat> &in,
int32 num_chunks,
CuMatrix<BaseFloat> *out) const {
out->Resize(in.NumRows(), this->OutputDim(), kUndefined);
out->SumColumnRanges(in, indexes_);
}
void SumGroupComponent::Backprop(const CuMatrixBase<BaseFloat> &, // in_value,
const CuMatrixBase<BaseFloat> &, // out_value,
const CuMatrixBase<BaseFloat> &out_deriv,
int32 num_chunks,
Component *to_update,
CuMatrix<BaseFloat> *in_deriv) const {
in_deriv->Resize(out_deriv.NumRows(), InputDim());
in_deriv->CopyCols(out_deriv, reverse_indexes_);
}
std::string SpliceComponent::Info() const {
std::stringstream stream;

Просмотреть файл

@ -450,7 +450,7 @@ class ScaleComponent: public Component {
class MixtureProbComponent; // Forward declaration.
class SumGroupComponent; // Forward declaration.
class AffineComponent; // Forward declaration.
class SoftmaxComponent: public NonlinearComponent {
@ -472,12 +472,13 @@ class SoftmaxComponent: public NonlinearComponent {
Component *to_update, // may be identical to "this".
CuMatrix<BaseFloat> *in_deriv) const;
void MixUp(int32 num_mixtures, // implemented in mixup-nnet.cc
void MixUp(int32 num_mixtures,
BaseFloat power,
BaseFloat min_count,
BaseFloat perturb_stddev,
AffineComponent *ac,
MixtureProbComponent *mc);
SumGroupComponent *sc);
virtual Component* Copy() const { return new SoftmaxComponent(*this); }
private:
SoftmaxComponent &operator = (const SoftmaxComponent &other); // Disallow.
@ -1227,8 +1228,6 @@ class BlockAffineComponentPreconditioned: public BlockAffineComponent {
// one for each row).
class MixtureProbComponent: public UpdatableComponent {
friend class SoftmaxComponent; // Mixing-up done by a function
// in that class.
public:
virtual int32 InputDim() const { return input_dim_; }
virtual int32 OutputDim() const { return output_dim_; }
@ -1275,6 +1274,53 @@ class MixtureProbComponent: public UpdatableComponent {
int32 output_dim_;
};
// SumGroupComponent is used to sum up groups of posteriors.
// It's used to introduce a kind of Gaussian-mixture-model-like
// idea into neural nets. This is basically a degenerate case of
// MixtureProbComponent; we had to implement it separately to
// be efficient for CUDA (we can use this one regardless whether
// we have CUDA or not; it's the normal case we want anyway).
class SumGroupComponent: public Component {
public:
virtual int32 InputDim() const { return input_dim_; }
virtual int32 OutputDim() const { return output_dim_; }
void Init(const std::vector<int32> &sizes); // the vector is of the input dim
// (>= 1) for each output dim.
void GetSizes(std::vector<int32> *sizes) const; // Get a vector saying, for
// each output-dim, how many
// inputs were summed over.
virtual void InitFromString(std::string args);
SumGroupComponent() { }
virtual std::string Type() const { return "SumGroupComponent"; }
virtual bool BackpropNeedsInput() const { return false; }
virtual bool BackpropNeedsOutput() const { return false; }
virtual void Propagate(const CuMatrixBase<BaseFloat> &in,
int32 num_chunks,
CuMatrix<BaseFloat> *out) const;
// Note: in_value and out_value are both dummy variables.
virtual void Backprop(const CuMatrixBase<BaseFloat> &in_value,
const CuMatrixBase<BaseFloat> &out_value,
const CuMatrixBase<BaseFloat> &out_deriv,
int32 num_chunks,
Component *to_update, // may be identical to "this".
CuMatrix<BaseFloat> *in_deriv) const;
virtual Component* Copy() const;
virtual void Read(std::istream &is, bool binary);
virtual void Write(std::ostream &os, bool binary) const;
private:
KALDI_DISALLOW_COPY_AND_ASSIGN(SumGroupComponent);
// Note: Int32Pair is just struct{ int32 first; int32 second }; it's defined
// in cu-matrixdim.h as extern "C" which is needed for the CUDA interface.
CuArray<Int32Pair> indexes_; // for each output index, the (start, end) input
// index.
CuArray<int32> reverse_indexes_; // for each input index, the output index.
int32 input_dim_;
int32 output_dim_;
};
/// PermuteComponent does a random permutation of the dimensions. Useful in
/// conjunction with block-diagonal transforms.
class PermuteComponent: public Component {

Просмотреть файл

@ -12,7 +12,7 @@ BINFILES = nnet-train-xent-hardlab-perutt \
nnet-train-mmi-sequential \
nnet-train-mpe-sequential \
rbm-train-cd1-frmshuff rbm-convert-to-nnet \
nnet-forward nnet-copy nnet1-info nnet-concat \
nnet-forward nnet-copy nnet-info nnet-concat \
transf-to-nnet cmvn-to-nnet
OBJFILES =

Просмотреть файл

@ -93,7 +93,7 @@ int main(int argc, char *argv[]) {
//create the shift component
{
AddShift* shift_component = new AddShift(shift.Dim(), shift.Dim(), &nnet);
AddShift* shift_component = new AddShift(shift.Dim(), shift.Dim());
//the pointer will be given to the nnet, so we don't need to call delete
//convert Vector to CuVector
@ -103,12 +103,12 @@ int main(int argc, char *argv[]) {
shift_component->SetShiftVec(cu_shift);
//append layer to the nnet
nnet.AppendLayer(shift_component);
nnet.AppendComponent(shift_component);
}
//create the scale component
{
Rescale* scale_component = new Rescale(scale.Dim(), scale.Dim(), &nnet);
Rescale* scale_component = new Rescale(scale.Dim(), scale.Dim());
//the pointer will be given to the nnet, so we don't need to call delete
//convert Vector to CuVector
@ -118,9 +118,8 @@ int main(int argc, char *argv[]) {
scale_component->SetScaleVec(cu_scale);
//append layer to the nnet
nnet.AppendLayer(scale_component);
nnet.AppendComponent(scale_component);
}
//write the nnet
{

Просмотреть файл

@ -1,6 +1,6 @@
// nnetbin/nnet-concat.cc
// Copyright 2012 Karel Vesely
// Copyright 2012-2013 Brno University of Technology (Author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//
@ -70,7 +70,7 @@ int main(int argc, char *argv[]) {
nnet_next.Read(ki.Stream(), binary_read);
}
//append nnet_next to the network nnet
nnet.Concatenate(&nnet_next);
nnet.AppendNnet(nnet_next);
}
//finally write the nnet to disk

Просмотреть файл

@ -1,6 +1,6 @@
// nnetbin/nnet-copy.cc
// Copyright 2012 Karel Vesely
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//
@ -64,14 +64,14 @@ int main(int argc, char *argv[]) {
// optionally remove N first layers
if(remove_first_layers > 0) {
for(int32 i=0; i<remove_first_layers; i++) {
nnet.RemoveLayer(0);
nnet.RemoveComponent(0);
}
}
// optionally remove N last layers
if(remove_last_layers > 0) {
for(int32 i=0; i<remove_last_layers; i++) {
nnet.RemoveLastLayer();
nnet.RemoveLastComponent();
}
}

Просмотреть файл

@ -87,15 +87,15 @@ int main(int argc, char *argv[]) {
Nnet nnet;
nnet.Read(model_filename);
//optionally remove softmax
if(no_softmax && nnet.Layer(nnet.LayerCount()-1)->GetType() == Component::kSoftmax) {
if(no_softmax && nnet.GetComponent(nnet.NumComponents()-1).GetType() == Component::kSoftmax) {
KALDI_LOG << "Removing softmax from the nnet " << model_filename;
nnet.RemoveLayer(nnet.LayerCount()-1);
nnet.RemoveComponent(nnet.NumComponents()-1);
}
//check for some non-sense option combinations
if(apply_log && no_softmax) {
KALDI_ERR << "Nonsense option combination : --apply-log=true and --no-softmax=true";
}
if(apply_log && nnet.Layer(nnet.LayerCount()-1)->GetType() != Component::kSoftmax) {
if(apply_log && nnet.GetComponent(nnet.NumComponents()-1).GetType() != Component::kSoftmax) {
KALDI_ERR << "Used --apply-log=true, but nnet " << model_filename
<< " does not have <softmax> as last component!";
}

Просмотреть файл

Просмотреть файл

@ -1,6 +1,6 @@
// nnetbin/nnet-train-mmi-sequential.cc
// Copyright 2012-2013 Karel Vesely
// Copyright 2012-2013 Brno University of Technology (author: Karel Vesely)
// See ../../COPYING for clarification regarding multiple authors
//
@ -173,9 +173,9 @@ int main(int argc, char *argv[]) {
Nnet nnet;
nnet.Read(model_filename);
// using activations directly: remove softmax, if present
if (nnet.Layer(nnet.LayerCount()-1)->GetType() == Component::kSoftmax) {
if (nnet.GetComponent(nnet.NumComponents()-1).GetType() == Component::kSoftmax) {
KALDI_LOG << "Removing softmax from the nnet " << model_filename;
nnet.RemoveLayer(nnet.LayerCount()-1);
nnet.RemoveComponent(nnet.NumComponents()-1);
} else {
KALDI_LOG << "The nnet was without softmax " << model_filename;
}
@ -424,7 +424,7 @@ int main(int argc, char *argv[]) {
//add back the softmax
KALDI_LOG << "Appending the softmax " << target_model_filename;
nnet.AppendLayer(new Softmax(nnet.OutputDim(),nnet.OutputDim(),&nnet));
nnet.AppendComponent(new Softmax(nnet.OutputDim(),nnet.OutputDim()));
//store the nnet
nnet.Write(target_model_filename, binary);

Просмотреть файл

@ -1,6 +1,6 @@
// nnetbin/nnet-train-mpe-sequential.cc
// Copyright 2011-2013 Karel Vesely; Arnab Ghoshal
// Copyright 2011-2013 Brno University of Technology (author: Karel Vesely); Arnab Ghoshal
// See ../../COPYING for clarification regarding multiple authors
//
@ -175,9 +175,9 @@ int main(int argc, char *argv[]) {
Nnet nnet;
nnet.Read(model_filename);
// using activations directly: remove softmax, if present
if (nnet.Layer(nnet.LayerCount()-1)->GetType() == Component::kSoftmax) {
if (nnet.GetComponent(nnet.NumComponents()-1).GetType() == Component::kSoftmax) {
KALDI_LOG << "Removing softmax from the nnet " << model_filename;
nnet.RemoveLayer(nnet.LayerCount()-1);
nnet.RemoveComponent(nnet.NumComponents()-1);
} else {
KALDI_LOG << "The nnet was without softmax " << model_filename;
}
@ -357,7 +357,7 @@ int main(int argc, char *argv[]) {
// add the softmax layer back before writing
KALDI_LOG << "Appending the softmax " << target_model_filename;
nnet.AppendLayer(new Softmax(nnet.OutputDim(),nnet.OutputDim(),&nnet));
nnet.AppendComponent(new Softmax(nnet.OutputDim(),nnet.OutputDim()));
//store the nnet
nnet.Write(target_model_filename, binary);

Просмотреть файл

@ -57,9 +57,9 @@ int main(int argc, char *argv[]) {
nnet.Read(ki.Stream(), binary_read);
}
KALDI_ASSERT(nnet.LayerCount() == 1);
KALDI_ASSERT(nnet.Layer(0)->GetType() == Component::kRbm);
RbmBase& rbm = dynamic_cast<RbmBase&>(*nnet.Layer(0));
KALDI_ASSERT(nnet.NumComponents() == 1);
KALDI_ASSERT(nnet.GetComponent(0).GetType() == Component::kRbm);
RbmBase& rbm = dynamic_cast<RbmBase&>(nnet.GetComponent(0));
{
Output ko(model_out_filename, binary_write);

Просмотреть файл

@ -103,9 +103,9 @@ int main(int argc, char *argv[]) {
Nnet nnet;
nnet.Read(model_filename);
KALDI_ASSERT(nnet.LayerCount()==1);
KALDI_ASSERT(nnet.Layer(0)->GetType() == Component::kRbm);
RbmBase &rbm = dynamic_cast<RbmBase&>(*nnet.Layer(0));
KALDI_ASSERT(nnet.NumComponents()==1);
KALDI_ASSERT(nnet.GetComponent(0).GetType() == Component::kRbm);
RbmBase &rbm = dynamic_cast<RbmBase&>(nnet.GetComponent(0));
// Configure the RBM
// first get make some options easy to access:

Просмотреть файл

@ -61,7 +61,7 @@ int main(int argc, char *argv[]) {
//we will put the transform to the nnet
Nnet nnet;
//create affine transform layer
AffineTransform* layer = new AffineTransform(transform.NumCols(),transform.NumRows(),&nnet);
AffineTransform* layer = new AffineTransform(transform.NumCols(),transform.NumRows());
//the pointer will be given to the nnet, so we don't need to call delete
//convert Matrix to CuMatrix
@ -71,7 +71,7 @@ int main(int argc, char *argv[]) {
layer->SetLinearity(cu_transform);
//append layer to the nnet
nnet.AppendLayer(layer);
nnet.AppendComponent(layer);
//write the nnet
{