зеркало из https://github.com/mozilla/kaldi.git
sandbox/dan2: merge various changes from trunk, plus some changes to enable efficient GPU-based training after 'mix-up'.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@3108 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
2545c1576e
Коммит
0b13df94de
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This runs on the 100 hour subset.
|
||||
|
||||
. cmd.sh
|
||||
|
||||
( # TODO: change 5a to 5a2.
|
||||
if [ ! -f exp/nnet5a2/final.mdl ]; then
|
||||
steps/nnet2/train_tanh.sh --stage 215 \
|
||||
--mix-up 8000 \
|
||||
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
|
||||
--num-jobs-nnet 16 --num-hidden-layers 4 \
|
||||
--hidden-layer-dim 1024 \
|
||||
--cmd "$decode_cmd" \
|
||||
data/train_100k_nodup data/lang exp/tri4a exp/nnet5a2 || exit 1;
|
||||
fi
|
||||
|
||||
for lm_suffix in tg fsh_tgpr; do
|
||||
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 30 \
|
||||
--config conf/decode.config --transform-dir exp/tri4a/decode_eval2000_sw1_${lm_suffix} \
|
||||
exp/tri4a/graph_sw1_${lm_suffix} data/eval2000 exp/nnet5a2/decode_eval2000_sw1_${lm_suffix} &
|
||||
done
|
||||
)
|
||||
|
|
@ -1,17 +1,19 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This runs on the 100 hour subset. This version of the recipe runs on GPUs.
|
||||
# We assume you have 8 GPU machines. You have to use --num-threads 1 so it will
|
||||
# use the version of the code that can use GPUs.
|
||||
# We assume the queue is set up as in JHU (or as in the "Kluster" project
|
||||
# on Sourceforge) where "gpu" is a consumable resource that you can set to
|
||||
# number of GPU cards a machine has.
|
||||
# We assume you have 8 GPU cards. You have to use --num-threads 1 so it will
|
||||
# use the version of the code that can use GPUs (the -parallel training code
|
||||
# cannot use GPUs unless we make further modifications as the CUDA model assumes
|
||||
# a single thread per GPU context, and we're not currently set up to create multiple
|
||||
# GPU contexts. We assume the queue is set up as in JHU (or
|
||||
# as in the "Kluster" project on Sourceforge) where "gpu" is a consumable
|
||||
# resource that you can set to number of GPU cards a machine has.
|
||||
|
||||
. cmd.sh
|
||||
|
||||
(
|
||||
if [ ! -f exp/nnet5b/final.mdl ]; then
|
||||
steps/nnet2/train_tanh.sh --cmd "$decode_cmd -l gpu=1" --parallel-opts "" --stage 0 \
|
||||
steps/nnet2/train_tanh.sh --cmd "$decode_cmd -l gpu=1" --parallel-opts "" --stage 253 \
|
||||
--num-threads 1 \
|
||||
--mix-up 8000 \
|
||||
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
|
||||
|
@ -26,4 +28,3 @@
|
|||
exp/tri4a/graph_sw1_${lm_suffix} data/eval2000 exp/nnet5b/decode_eval2000_sw1_${lm_suffix} &
|
||||
done
|
||||
)
|
||||
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This runs on the 100 hour subset. This version of the recipe runs on GPUs.
|
||||
# We assume you have 8 GPU cards. You have to use --num-threads 1 so it will
|
||||
# use the version of the code that can use GPUs (the -parallel training code
|
||||
# cannot use GPUs unless we make further modifications as the CUDA model assumes
|
||||
# a single thread per GPU context, and we're not currently set up to create multiple
|
||||
# GPU contexts. We assume the queue is set up as in JHU (or
|
||||
# as in the "Kluster" project on Sourceforge) where "gpu" is a consumable
|
||||
# resource that you can set to number of GPU cards a machine has.
|
||||
|
||||
. cmd.sh
|
||||
|
||||
(
|
||||
if [ ! -f exp/nnet5b/final.mdl ]; then
|
||||
steps/nnet2/train_tanh.sh --cmd "$decode_cmd -l gpu=1" --parallel-opts "" --io-opts "-tc 5 -l gpu=0" --stage -3 \
|
||||
--num-threads 1 --minibatch-size 512 --max-change 40.0 --mix-up 8000 \
|
||||
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
|
||||
--num-jobs-nnet 8 --num-hidden-layers 4 \
|
||||
--hidden-layer-dim 1024 \
|
||||
data/train_100k_nodup data/lang exp/tri4a exp/nnet5b2 || exit 1;
|
||||
fi
|
||||
|
||||
for lm_suffix in tg fsh_tgpr; do
|
||||
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 30 \
|
||||
--config conf/decode.config --transform-dir exp/tri4a/decode_eval2000_sw1_${lm_suffix} \
|
||||
exp/tri4a/graph_sw1_${lm_suffix} data/eval2000 exp/nnet5b2/decode_eval2000_sw1_${lm_suffix} &
|
||||
done
|
||||
)
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
(
|
||||
if [ ! -f exp/nnet5a/final.mdl ]; then
|
||||
steps/train_nnet_cpu.sh \
|
||||
steps/train_nnet_cpu.sh --stage 103 \
|
||||
--mix-up 8000 \
|
||||
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
|
||||
--num-jobs-nnet 16 --num-hidden-layers 4 \
|
||||
|
|
|
@ -62,7 +62,7 @@ max_change=10.0
|
|||
mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
|
||||
# specified.)
|
||||
num_threads=16
|
||||
parallel_opts="-pe smp $num_threads" # using a smallish #threads by default, out of stability concerns.
|
||||
parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
|
||||
# note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
|
||||
cleanup=true
|
||||
egs_dir=
|
||||
|
@ -105,8 +105,9 @@ if [ $# != 4 ]; then
|
|||
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
|
||||
echo " # as well as speed; may interact with batch size; if you increase"
|
||||
echo " # this, you may want to decrease the batch size."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
|
||||
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
|
||||
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for jobs that do a lot of I/O."
|
||||
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
|
||||
echo " # should not get too large, e.g. >2k)."
|
||||
|
|
|
@ -50,7 +50,7 @@ num_hidden_layers=3
|
|||
|
||||
stage=-5
|
||||
|
||||
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time.
|
||||
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't
|
||||
splice_width=4 # meaning +- 4 frames on each side for second LDA
|
||||
randprune=4.0 # speeds up LDA.
|
||||
alpha=4.0
|
||||
|
@ -58,7 +58,7 @@ max_change=10.0
|
|||
mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
|
||||
# specified.)
|
||||
num_threads=16
|
||||
parallel_opts="-pe smp $num_threads" # using a smallish #threads by default, out of stability concerns.
|
||||
parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
|
||||
# note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
|
||||
cleanup=true
|
||||
egs_dir=
|
||||
|
@ -101,8 +101,9 @@ if [ $# != 4 ]; then
|
|||
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
|
||||
echo " # as well as speed; may interact with batch size; if you increase"
|
||||
echo " # this, you may want to decrease the batch size."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
|
||||
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
|
||||
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for jobs that do a lot of I/O."
|
||||
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
|
||||
echo " # should not get too large, e.g. >2k)."
|
||||
|
@ -170,7 +171,7 @@ if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then
|
|||
echo "$0: calling get_egs.sh"
|
||||
[ ! -z $spk_vecs_dir ] && spk_vecs_opt="--spk-vecs-dir $spk_vecs_dir";
|
||||
steps/nnet2/get_egs.sh $spk_vecs_opt --samples-per-iter $samples_per_iter --num-jobs-nnet $num_jobs_nnet \
|
||||
--splice-width $splice_width --stage $get_egs_stage --cmd "$cmd" $egs_opts \
|
||||
--splice-width $splice_width --stage $get_egs_stage --cmd "$cmd" $egs_opts --io-opts "$io_opts" \
|
||||
$data $lang $alidir $dir || exit 1;
|
||||
fi
|
||||
|
||||
|
|
13
src/Makefile
13
src/Makefile
|
@ -9,6 +9,14 @@ SUBDIRS = base matrix util feat tree thread gmm tied transform sgmm \
|
|||
fstext hmm lm decoder lat cudamatrix nnet \
|
||||
bin fstbin gmmbin fgmmbin tiedbin sgmmbin featbin \
|
||||
nnetbin latbin sgmm2 sgmm2bin nnet2 nnet2bin kwsbin
|
||||
|
||||
MEMTESTDIRS = base matrix util feat tree thread gmm tied transform sgmm \
|
||||
fstext hmm lm decoder lat nnet \
|
||||
bin fstbin gmmbin fgmmbin tiedbin sgmmbin featbin \
|
||||
nnetbin latbin sgmm2 sgmm2bin nnet-cpu nnet-cpubin kwsbin
|
||||
|
||||
CUDAMEMTESTDIR = cudamatrix
|
||||
|
||||
SUBDIRS_LIB = $(filter-out %bin, $(SUBDIRS))
|
||||
|
||||
|
||||
|
@ -97,8 +105,11 @@ ext_test: $(addsuffix /test, $(EXT_SUBDIRS))
|
|||
%/test: % mklibdir
|
||||
$(MAKE) -C $< test
|
||||
|
||||
cudavalgrind:
|
||||
-for x in $(CUDAMEMTESTDIR); do $(MAKE) -C $$x valgrind || { echo "valgrind on $$x failed"; exit 1; }; done
|
||||
|
||||
valgrind:
|
||||
-for x in $(SUBDIRS); do $(MAKE) -C $$x valgrind || { echo "valgrind on $$x failed"; exit 1; }; done
|
||||
-for x in $(MEMTESTDIRS); do $(MAKE) -C $$x valgrind || { echo "valgrind on $$x failed"; exit 1; }; done
|
||||
|
||||
|
||||
depend: $(addsuffix /depend, $(SUBDIRS))
|
||||
|
|
|
@ -12,7 +12,7 @@ LDFLAGS += $(CUDA_LDFLAGS)
|
|||
LDLIBS += $(CUDA_LDLIBS)
|
||||
|
||||
TESTFILES = cu-vector-test cu-matrix-test cu-math-test cu-test cu-sp-matrix-test cu-packed-matrix-test cu-tp-matrix-test \
|
||||
cu-block-matrix-test cu-matrix-speed-test cu-vector-speed-test cu-sp-matrix-speed-test
|
||||
cu-block-matrix-test cu-matrix-speed-test cu-vector-speed-test cu-sp-matrix-speed-test cu-array-test
|
||||
|
||||
|
||||
OBJFILES = cu-device.o cu-math.o cu-matrix.o cu-packed-matrix.o cu-sp-matrix.o \
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// cudamatrix/cu-array-inl.h
|
||||
|
||||
// Copyright 2009-2012 Karel Vesely
|
||||
// 2013 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -113,7 +114,7 @@ void CuArray<T>::CopyToVec(std::vector<T> *dst) const {
|
|||
if (CuDevice::Instantiate().Enabled()) {
|
||||
Timer tim;
|
||||
CU_SAFE_CALL(cudaMemcpy(&dst->front(), Data(), dim_*sizeof(T), cudaMemcpyDeviceToHost));
|
||||
CuDevice::Instantiate().AccuProfile("CuArray::CopyToVecD2H",tim.Elapsed());
|
||||
CuDevice::Instantiate().AccuProfile("CuArray::CopyToVecD2H", tim.Elapsed());
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
|
@ -129,7 +130,7 @@ void CuArray<T>::SetZero() {
|
|||
if (CuDevice::Instantiate().Enabled()) {
|
||||
Timer tim;
|
||||
CU_SAFE_CALL(cudaMemset(data_, 0, dim_ * sizeof(T)));
|
||||
CuDevice::Instantiate().AccuProfile("CuArray::SetZero",tim.Elapsed());
|
||||
CuDevice::Instantiate().AccuProfile("CuArray::SetZero", tim.Elapsed());
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
|
@ -184,6 +185,24 @@ inline void CuArray<int32>::Set(const int32 &value) {
|
|||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void CuArray<T>::CopyFromArray(const CuArray<T> &src) {
|
||||
this->Resize(src.Dim(), kUndefined);
|
||||
if (dim_ == 0) return;
|
||||
#if HAVE_CUDA == 1
|
||||
if (CuDevice::Instantiate().Enabled()) {
|
||||
Timer tim;
|
||||
CU_SAFE_CALL(cudaMemcpy(this->data_, src.data_, dim_ * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice));
|
||||
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
memcpy(this->data_, src.data_, dim_ * sizeof(T));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace kaldi
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
// cudamatrix/cu-array-test.cc
|
||||
|
||||
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "cudamatrix/cu-array.h"
|
||||
|
||||
using namespace kaldi;
|
||||
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
|
||||
|
||||
|
||||
template<class T>
|
||||
static void UnitTestCuArray() {
|
||||
for (int32 i = 0; i < 30; i++) {
|
||||
int32 size = rand() % 5;
|
||||
size = size * size * size; // Have a good distribution of sizes, including >256.
|
||||
int32 size2 = rand() % 4;
|
||||
std::vector<T> vec(size);
|
||||
std::vector<T> garbage_vec(size2); // We just use garbage_vec to make sure
|
||||
// we sometimes resize from empty,
|
||||
// sometimes not.
|
||||
|
||||
int32 byte_size = size * sizeof(T);
|
||||
std::vector<char> rand_c(byte_size);
|
||||
for (size_t i = 0; i < byte_size; i++)
|
||||
rand_c[i] = rand() % 256;
|
||||
if (!vec.empty()) {
|
||||
std::memcpy((void*)&(vec[0]), (void*)&(rand_c[0]),
|
||||
byte_size);
|
||||
}
|
||||
|
||||
{ // test constructor from vector and CopyToVec.
|
||||
CuArray<T> cu_vec(vec);
|
||||
std::vector<T> vec2;
|
||||
cu_vec.CopyToVec(&vec2);
|
||||
KALDI_ASSERT(vec2 == vec);
|
||||
}
|
||||
|
||||
{ // test assignment operator from CuArray.
|
||||
CuArray<T> cu_vec(vec);
|
||||
CuArray<T> cu_vec2(garbage_vec);
|
||||
cu_vec2 = cu_vec;
|
||||
std::vector<T> vec2;
|
||||
cu_vec2.CopyToVec(&vec2);
|
||||
KALDI_ASSERT(vec2 == vec);
|
||||
KALDI_ASSERT(cu_vec2.Dim() == int32(vec2.size())); // test Dim()
|
||||
}
|
||||
|
||||
{ // test resize with resize_type = kSetZero.
|
||||
CuArray<T> cu_vec(vec);
|
||||
cu_vec.Resize(size, kSetZero);
|
||||
std::vector<T> vec2(vec);
|
||||
|
||||
if (!vec2.empty())
|
||||
std::memset(&(vec2[0]), 0, vec2.size() * sizeof(T));
|
||||
std::vector<T> vec3;
|
||||
cu_vec.CopyToVec(&vec3);
|
||||
KALDI_ASSERT(vec2 == vec3); // testing equality of zero arrays.
|
||||
}
|
||||
|
||||
if (sizeof(T) == sizeof(int32) && size > 0) { // test Set for type int32, or same size.
|
||||
CuArray<T> cu_vec(vec);
|
||||
cu_vec.Set(vec[0]);
|
||||
for (size_t i = 1; i < vec.size(); i++) vec[i] = vec[0];
|
||||
std::vector<T> vec2;
|
||||
cu_vec.CopyToVec(&vec2);
|
||||
KALDI_ASSERT(vec2 == vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace kaldi
|
||||
|
||||
|
||||
int main() {
|
||||
for (int32 loop = 0; loop < 2; loop++) {
|
||||
#if HAVE_CUDA == 1
|
||||
if (loop == 0)
|
||||
CuDevice::Instantiate().SelectGpuId(-1); // -1 means no GPU
|
||||
else
|
||||
CuDevice::Instantiate().SelectGpuId(-2); // -2 .. automatic selection
|
||||
#endif
|
||||
|
||||
//kaldi::UnitTestCuArray<float>();
|
||||
kaldi::UnitTestCuArray<double>();
|
||||
kaldi::UnitTestCuArray<int32>();
|
||||
kaldi::UnitTestCuArray<std::pair<int32, int32> >();
|
||||
|
||||
if (loop == 0)
|
||||
KALDI_LOG << "Tests without GPU use succeeded.\n";
|
||||
else
|
||||
KALDI_LOG << "Tests with GPU use (if available) succeeded.\n";
|
||||
}
|
||||
#if HAVE_CUDA == 1
|
||||
CuDevice::Instantiate().PrintProfile();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
|
@ -49,7 +49,10 @@ class CuArray {
|
|||
/// Constructor from CPU-based int vector
|
||||
explicit CuArray<T>(const std::vector<T> &src):
|
||||
dim_(0), data_(NULL) { CopyFromVec(src); }
|
||||
|
||||
|
||||
explicit CuArray<T>(const CuArray<T> &src):
|
||||
dim_(0), data_(NULL) { CopyFromArray(src); }
|
||||
|
||||
/// Destructor
|
||||
~CuArray() { Destroy(); }
|
||||
|
||||
|
@ -73,6 +76,9 @@ class CuArray {
|
|||
/// and any constructors or assignment operators are not called.
|
||||
void CopyFromVec(const std::vector<T> &src);
|
||||
|
||||
/// This function resizes if needed.
|
||||
void CopyFromArray(const CuArray<T> &src);
|
||||
|
||||
/// This function resizes *dst if needed. On resize of "dst", the STL vector
|
||||
/// may call copy-constructors, initializers, and assignment operators for
|
||||
/// existing objects (which will be overwritten), but the copy from GPU to CPU
|
||||
|
@ -88,6 +94,14 @@ class CuArray {
|
|||
/// assignment operators or destructors are not called. This is NOT IMPLEMENTED
|
||||
/// YET except for T == int32 (the current implementation will just crash).
|
||||
void Set(const T &value);
|
||||
|
||||
CuArray<T> &operator= (const CuArray<T> &in) {
|
||||
this->CopyFromArray(in); return *this;
|
||||
}
|
||||
|
||||
CuArray<T> &operator= (const std::vector<T> &in) {
|
||||
this->CopyFromVec(in); return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
MatrixIndexT dim_; ///< dimension of the vector
|
||||
|
|
|
@ -165,6 +165,7 @@ void CuBlockMatrix<Real>::Write(std::ostream &os, bool binary) const {
|
|||
WriteToken(os, binary, "</CuBlockMatrix>");
|
||||
}
|
||||
|
||||
|
||||
template<class Real>
|
||||
void CuBlockMatrix<Real>::Read(std::istream &is, bool binary) {
|
||||
Destroy();
|
||||
|
|
|
@ -44,6 +44,7 @@ namespace kaldi {
|
|||
'primary' home remains on the CPU.. what we mean by this is that
|
||||
while the data remains on the GPU, the "primary" version of the
|
||||
Matrix object that holds the pointers will remain on the CPU.
|
||||
We just copy it over to the GPU whenever it is changed.
|
||||
*/
|
||||
|
||||
template<typename Real>
|
||||
|
@ -94,10 +95,15 @@ class CuBlockMatrix {
|
|||
|
||||
|
||||
/// Copies elements within the block structure from matrix M, discarding others.
|
||||
/// Note: this has not been impelemented in a very efficient way, it's used only
|
||||
/// Note: this has not been implemented in a very efficient way, it's used only
|
||||
/// for testing.
|
||||
void CopyFromMat(const CuMatrix<Real> &M);
|
||||
|
||||
/// Normalizes the columns of *this so that each one sums to one.
|
||||
/// On error (e.g. inf's), will set the column to a constant value that
|
||||
/// sums to one.
|
||||
void NormalizeColumns();
|
||||
|
||||
void Swap(CuBlockMatrix *other);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -79,7 +79,7 @@ void CuDevice::SelectGpuId(int32 gpu_id, bool abort_on_error) {
|
|||
// Check that we have a gpu available
|
||||
int32 n_gpu = 0;
|
||||
cudaGetDeviceCount(&n_gpu);
|
||||
if(n_gpu == 0 && gpu_id == -2) {
|
||||
if(n_gpu == 0) {
|
||||
// If we do automatic selection and no GPU is found, we run on a CPU
|
||||
if (abort_on_error) {
|
||||
KALDI_ERR << "No CUDA capable GPU was detected";
|
||||
|
@ -89,16 +89,6 @@ void CuDevice::SelectGpuId(int32 gpu_id, bool abort_on_error) {
|
|||
return;
|
||||
}
|
||||
}
|
||||
if(n_gpu == 0) {
|
||||
if (abort_on_error) {
|
||||
KALDI_ERR << "No CUDA capable GPU was detected.";
|
||||
} else {
|
||||
KALDI_WARN << "No CUDA capable GPU detected, while explicitly asked for gpu-id '"
|
||||
<< gpu_id << "'.CUDA will NOT be used!!!";
|
||||
active_gpu_id_ = -2;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Now we know that there is a GPU in the system,
|
||||
// and we don't want to have it disabled.
|
||||
|
@ -390,7 +380,7 @@ void CuDevice::PrintProfile() {
|
|||
for(it = profile_map_.begin(); it != profile_map_.end(); ++it)
|
||||
pairs.push_back(std::make_pair(it->second, it->first));
|
||||
std::sort(pairs.begin(), pairs.end());
|
||||
size_t max_print = 15, start_pos = (pairs.size() > max_print ?
|
||||
size_t max_print = 15, start_pos = (pairs.size() <= max_print ?
|
||||
0 : pairs.size() - max_print);
|
||||
for (size_t i = start_pos; i < pairs.size(); i++)
|
||||
os << pairs[i].second << "\t" << pairs[i].first << "s\n";
|
||||
|
|
|
@ -148,6 +148,9 @@ void cudaF_comp_obj_deriv(dim3 Gr,dim3 Bl, MatrixElement<float>* x, int s, const
|
|||
void cudaF_transpose_matrix(dim3 Gr, dim3 Bl, float* mat, MatrixDim d);
|
||||
void cudaF_sy_add_tr2(dim3 Gr, dim3 Bl, float alpha, float beta, const float* T, MatrixDim tdim,
|
||||
float *S, MatrixDim sdim);
|
||||
void cudaF_sum_column_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim,
|
||||
const float *src_data, MatrixDim src_dim,
|
||||
const Int32Pair *indices);
|
||||
|
||||
|
||||
/*********************************************************
|
||||
|
@ -277,6 +280,10 @@ void cudaD_comp_obj_deriv(dim3 Gr,dim3 Bl, MatrixElement<double>* x, int s, cons
|
|||
void cudaD_transpose_matrix(dim3 Gr, dim3 Bl, double* mat, MatrixDim d);
|
||||
void cudaD_sy_add_tr2(dim3 Gr, dim3 Bl, double alpha, double beta, const double* T, MatrixDim tdim,
|
||||
double *S, MatrixDim sdim);
|
||||
void cudaD_sum_column_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim,
|
||||
const double *src_data, MatrixDim src_dim,
|
||||
const Int32Pair *indices);
|
||||
|
||||
|
||||
|
||||
} // extern "C"
|
||||
|
|
|
@ -1297,6 +1297,64 @@ static void _block_add_mat_mat(CuBlockMatrixData *B_cu_data, int num_blocks,
|
|||
}
|
||||
|
||||
|
||||
template<typename Real>
|
||||
__global__
|
||||
static void _blockadd_mat_blockmat_trans(Real *data, MatrixDim dim, const Real *A_data, int A_num_rows, int A_num_cols,
|
||||
int A_row_stride, int A_col_stride, const CuBlockMatrixData *B_cu_data,
|
||||
int B_num_blocks, Real alpha, Real beta) {
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x; // row-index into "data"
|
||||
int j = blockIdx.y * blockDim.y + threadIdx.y; // block-index into B.
|
||||
if (i >= A_num_rows || j >= B_num_blocks) return;
|
||||
|
||||
const CuBlockMatrixData &cu_data = B_cu_data[j];
|
||||
|
||||
// BT means B transposed.
|
||||
int BT_row_start = cu_data.col_offset,
|
||||
BT_col_start = cu_data.row_offset,
|
||||
BT_num_rows = cu_data.matrix_dim.cols,
|
||||
BT_num_cols = cu_data.matrix_dim.rows,
|
||||
BT_col_stride = cu_data.matrix_dim.stride;
|
||||
const Real *B_data = static_cast<Real*>(cu_data.matrix_data); // Cast from void;
|
||||
// we avoided a bunch of hassle by doing this (relates to Ansi-C requirement).
|
||||
|
||||
for (int k = 0; k < BT_num_cols; k++) {
|
||||
const Real *this_BT_col = B_data + k * BT_col_stride;
|
||||
const Real *this_A_row = A_data + i * A_row_stride + BT_row_start * A_col_stride;
|
||||
// this_A_row points to the element A[i][BT_row_start], it's really just
|
||||
// part of this row of A.
|
||||
Real sum = 0.0;
|
||||
for (int l = 0; l < BT_num_rows; l++) // l indexes rows of B.
|
||||
sum += this_BT_col[l] * this_A_row[l * A_col_stride];
|
||||
|
||||
int index = i * dim.stride + (k + BT_col_start);
|
||||
data[index] = alpha * sum + beta * data[index];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Since this is a newer kernel, x is the row-index and y is the
|
||||
// column-index.
|
||||
template<typename Real>
|
||||
__global__
|
||||
static void _sum_column_ranges(Real *data, MatrixDim dim,
|
||||
const Real *src_data,
|
||||
MatrixDim src_dim,
|
||||
const Int32Pair *indices) {
|
||||
int row = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int col = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
if (row >= dim.rows || col >= dim.cols)
|
||||
return;
|
||||
int dest_index = row * dim.stride + col,
|
||||
src_start_index = row * src_dim.stride + indices[col].first,
|
||||
src_end_index = row * src_dim.stride + indices[col].second;
|
||||
Real sum = 0.0;
|
||||
for (int index = src_start_index; index < src_end_index; index++)
|
||||
sum += src_data[index];
|
||||
data[dest_index] = sum;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename Real>
|
||||
__global__
|
||||
static void _soft_hinge(Real*y, const Real*x, MatrixDim d, int src_stride) {
|
||||
|
@ -2047,6 +2105,11 @@ void cudaF_copy_col_from_mat_fd(int Gr, int Bl, float* v, int col, const float*
|
|||
_copy_col_from_mat_fd<<<Gr,Bl>>>(v,col,mat,dmat,dim);
|
||||
}
|
||||
|
||||
void cudaF_sum_column_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim,
|
||||
const float *src_data, MatrixDim src_dim,
|
||||
const Int32Pair *indices) {
|
||||
_sum_column_ranges<<<Gr,Bl>>>(data, dim, src_data, src_dim, indices);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -2407,6 +2470,11 @@ void cudaD_copy_rows_from_vec(dim3 Gr, dim3 Bl, double *mat_out, MatrixDim d_out
|
|||
_copy_rows_from_vec<<<Gr,Bl>>>(mat_out, d_out, v_in);
|
||||
}
|
||||
|
||||
void cudaD_sum_column_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim,
|
||||
const double *src_data, MatrixDim src_dim,
|
||||
const Int32Pair *indices) {
|
||||
_sum_column_ranges<<<Gr,Bl>>>(data, dim, src_data, src_dim, indices);
|
||||
}
|
||||
|
||||
|
||||
/* Some conversion kernels for which it's more convenient to not name them F or D. */
|
||||
|
|
|
@ -207,7 +207,12 @@ inline void cuda_take_lower(dim3 Gr, dim3 Bl, const float* x, float* y, MatrixDi
|
|||
inline void cuda_take_upper(dim3 Gr, dim3 Bl, const float* x, float* y, MatrixDim d_in) { cudaF_take_upper(Gr,Bl,x,y,d_in); }
|
||||
inline void cuda_take_mean(dim3 Gr, dim3 Bl, const float* x, float* y, MatrixDim d_in) { cudaF_take_mean(Gr,Bl,x,y,d_in); }
|
||||
inline void cuda_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement<float>* x, int32 size, const float* z, MatrixDim d, float* z2, MatrixDim d2, float* t) {cudaF_comp_obj_deriv(Gr,Bl,x,size,z,d,z2,d2,t); }
|
||||
inline void cuda_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement<double>* x, int32 size, const double* z, MatrixDim d, double* z2, MatrixDim d2, double* t) {cudaD_comp_obj_deriv(Gr,Bl,x,size,z,d,z2,d2,t); }
|
||||
inline void cuda_sum_column_ranges(dim3 Gr, dim3 Bl, float *data, MatrixDim dim,
|
||||
const float *src_data, MatrixDim src_dim,
|
||||
const Int32Pair *indices) {
|
||||
cudaF_sum_column_ranges(Gr, Bl, data, dim, src_data, src_dim, indices);
|
||||
}
|
||||
|
||||
|
||||
// double versions
|
||||
|
||||
|
@ -347,6 +352,11 @@ inline void cuda_copy_from_sp(int Gr, int Bl, const double* x, double* y, int d_
|
|||
inline void cuda_take_lower(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim d_in) { cudaD_take_lower(Gr,Bl,x,y,d_in); }
|
||||
inline void cuda_take_upper(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim d_in) { cudaD_take_upper(Gr,Bl,x,y,d_in); }
|
||||
inline void cuda_take_mean(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim d_in) { cudaD_take_mean(Gr,Bl,x,y,d_in); }
|
||||
inline void cuda_comp_obj_deriv(dim3 Gr, dim3 Bl, MatrixElement<double>* x, int32 size, const double* z, MatrixDim d, double* z2, MatrixDim d2, double* t) {cudaD_comp_obj_deriv(Gr,Bl,x,size,z,d,z2,d2,t); }
|
||||
inline void cuda_sum_column_ranges(dim3 Gr, dim3 Bl, double *data, MatrixDim dim,
|
||||
const double *src_data, MatrixDim src_dim, const Int32Pair *indices) {
|
||||
cudaD_sum_column_ranges(Gr, Bl, data, dim, src_data, src_dim, indices);
|
||||
}
|
||||
|
||||
|
||||
// Also include some template-friendly wrappers of cublas functions:
|
||||
|
|
|
@ -67,6 +67,7 @@ void Splice(const CuMatrix<Real> &src,
|
|||
/// The matrices src and tgt must have the same dimensions and
|
||||
/// the dimension of copy_from_indices must equal the number of columns
|
||||
/// in the src matrix. As a result, tgt(i, j) == src(i, copy_from_indices[j]).
|
||||
/// Also see CuMatrix::CopyCols(), which is more general.
|
||||
template<typename Real>
|
||||
void Copy(const CuMatrix<Real> &src,
|
||||
const CuArray<int32> ©_from_indices,
|
||||
|
|
|
@ -340,6 +340,47 @@ template<typename Real> void UnitTestCuMatrixCopyCross2() {
|
|||
}
|
||||
}
|
||||
|
||||
template<typename Real>
|
||||
static void UnitTestCuMatrixSumColumnRanges() {
|
||||
for (MatrixIndexT p = 0; p < 10; p++) {
|
||||
MatrixIndexT num_cols1 = 10 + rand() % 10,
|
||||
num_cols2 = 10 + rand() % 10,
|
||||
num_rows = 10 + rand() % 10;
|
||||
Matrix<Real> src(num_rows, num_cols1);
|
||||
Matrix<Real> dst(num_rows, num_cols2);
|
||||
std::vector<Int32Pair> indices(num_cols2);
|
||||
for (MatrixIndexT i = 0; i < num_cols2; i++) {
|
||||
indices[i].first = rand() % num_cols1;
|
||||
int32 headroom = num_cols1 - indices[i].first,
|
||||
size = (rand() % headroom) + 1;
|
||||
indices[i].second = indices[i].first + size;
|
||||
KALDI_ASSERT(indices[i].second >= indices[i].first &&
|
||||
indices[i].second <= num_cols1 &&
|
||||
indices[i].first >= 0);
|
||||
// In the test we allow second == first.
|
||||
}
|
||||
src.SetRandn();
|
||||
// Simple computation:
|
||||
for (MatrixIndexT i = 0; i < num_rows; i++) {
|
||||
for (MatrixIndexT j = 0; j < num_cols2; j++) {
|
||||
int32 start = indices[j].first, end = indices[j].second;
|
||||
Real sum = 0.0;
|
||||
for (MatrixIndexT j2 = start; j2 < end; j2++)
|
||||
sum += src(i, j2);
|
||||
dst(i, j) = sum;
|
||||
}
|
||||
}
|
||||
CuMatrix<Real> cu_src(src);
|
||||
CuMatrix<Real> cu_dst(num_rows, num_cols2, kUndefined);
|
||||
CuArray<Int32Pair> indices_tmp(indices);
|
||||
cu_dst.SumColumnRanges(cu_src, indices_tmp);
|
||||
Matrix<Real> dst2(cu_dst);
|
||||
AssertEqual(dst, dst2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename Real>
|
||||
static void UnitTestCuMatrixCopyCols() {
|
||||
for (MatrixIndexT p = 0; p < 10; p++) {
|
||||
|
@ -353,8 +394,13 @@ static void UnitTestCuMatrixCopyCols() {
|
|||
std::vector<int32> reorder(num_cols2);
|
||||
for (int32 i = 0; i < num_cols2; i++)
|
||||
reorder[i] = -1 + (rand() % (num_cols1 + 1));
|
||||
|
||||
N.CopyCols(M, reorder);
|
||||
|
||||
if (rand() % 2 == 0) {
|
||||
N.CopyCols(M, reorder);
|
||||
} else {
|
||||
CuArray<int32> cuda_reorder(reorder);
|
||||
N.CopyCols(M, cuda_reorder);
|
||||
}
|
||||
|
||||
for (int32 i = 0; i < num_rows; i++)
|
||||
for (int32 j = 0; j < num_cols2; j++)
|
||||
|
@ -1640,6 +1686,7 @@ template<typename Real> void CudaMatrixUnitTest() {
|
|||
UnitTestCuMatrixCopyFromTp<Real>();
|
||||
UnitTestCuMatrixAddMatTp<Real>();
|
||||
UnitTestCuMatrixCopyCols<Real>();
|
||||
UnitTestCuMatrixSumColumnRanges<Real>();
|
||||
UnitTestCuMatrixCopyRows<Real>();
|
||||
UnitTestCuMatrixCopyRowsFromVec<Real>();
|
||||
UnitTestCuMatrixAddTpMat<Real>();
|
||||
|
|
|
@ -1622,6 +1622,7 @@ void VectorBase<float>::CopyRowsFromMat(const CuMatrixBase<float> &mat);
|
|||
template
|
||||
void VectorBase<double>::CopyRowsFromMat(const CuMatrixBase<double> &mat);
|
||||
|
||||
|
||||
template<typename Real>
|
||||
void CuMatrixBase<Real>::CopyCols(const CuMatrixBase<Real> &src,
|
||||
const std::vector<MatrixIndexT> &reorder) {
|
||||
|
@ -1650,6 +1651,30 @@ void CuMatrixBase<Real>::CopyCols(const CuMatrixBase<Real> &src,
|
|||
}
|
||||
}
|
||||
|
||||
template<typename Real>
|
||||
void CuMatrixBase<Real>::CopyCols(const CuMatrixBase<Real> &src,
|
||||
const CuArray<MatrixIndexT> &reorder) {
|
||||
#if HAVE_CUDA == 1
|
||||
if (CuDevice::Instantiate().Enabled()) {
|
||||
KALDI_ASSERT(reorder.Dim() == NumCols());
|
||||
KALDI_ASSERT(NumRows() == src.NumRows());
|
||||
Timer tim;
|
||||
dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
|
||||
// This kernel, as it is newer has the (x,y) dims as (rows,cols).
|
||||
dim3 dimGrid(n_blocks(NumRows(), CU2DBLOCK), n_blocks(NumCols(), CU2DBLOCK));
|
||||
cuda_copy_cols(dimGrid, dimBlock, data_, src.Data(), reorder.Data(), Dim(), src.Stride());
|
||||
CU_SAFE_CALL(cudaGetLastError());
|
||||
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
std::vector<MatrixIndexT> reorder_cpu;
|
||||
reorder.CopyToVec(&reorder_cpu);
|
||||
Mat().CopyCols(src.Mat(), reorder_cpu);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename Real>
|
||||
void CuMatrixBase<Real>::CopyRows(const CuMatrixBase<Real> &src,
|
||||
const std::vector<MatrixIndexT> &reorder) {
|
||||
|
@ -1678,6 +1703,46 @@ void CuMatrixBase<Real>::CopyRows(const CuMatrixBase<Real> &src,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename Real>
|
||||
void CuMatrixBase<Real>::SumColumnRanges(const CuMatrixBase<Real> &src,
|
||||
const CuArray<Int32Pair> &indices) {
|
||||
KALDI_ASSERT(static_cast<MatrixIndexT>(indices.Dim()) == NumCols());
|
||||
KALDI_ASSERT(NumRows() == src.NumRows());
|
||||
if (NumRows() == 0) return;
|
||||
#if HAVE_CUDA == 1
|
||||
if (CuDevice::Instantiate().Enabled()) {
|
||||
|
||||
Timer tim;
|
||||
dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
|
||||
// This kernel, as it is newer has the (x,y) dims as (rows,cols).
|
||||
dim3 dimGrid(n_blocks(NumRows(), CU2DBLOCK), n_blocks(NumCols(), CU2DBLOCK));
|
||||
cuda_sum_column_ranges(dimGrid, dimBlock, data_, Dim(), src.Data(), src.Dim(), indices.Data());
|
||||
CU_SAFE_CALL(cudaGetLastError());
|
||||
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
|
||||
} else
|
||||
#endif
|
||||
{ // Implement here for the CPU..
|
||||
int32 num_rows = this->num_rows_, num_cols = this->num_cols_,
|
||||
this_stride = this->stride_, src_stride = src.stride_;
|
||||
Real *data = this->data_;
|
||||
const Real *src_data = src.data_;
|
||||
const Int32Pair *indices_data = indices.Data();
|
||||
for (int32 row = 0; row < num_rows; row++) {
|
||||
for (int32 col = 0; col < num_cols; col++) {
|
||||
int32 start_col = indices_data[col].first,
|
||||
end_col = indices_data[col].second;
|
||||
Real sum = 0.0;
|
||||
for (int32 src_col = start_col; src_col < end_col; src_col++)
|
||||
sum += src_data[row * src_stride + src_col];
|
||||
data[row * this_stride + col] = sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename Real>
|
||||
void CuMatrixBase<Real>::CopyLowerToUpper() {
|
||||
KALDI_ASSERT(num_cols_ == num_rows_);
|
||||
|
|
|
@ -91,6 +91,11 @@ class CuMatrixBase {
|
|||
void CopyCols(const CuMatrixBase<Real> &src,
|
||||
const std::vector<MatrixIndexT> &indices);
|
||||
|
||||
/// Version of CopyCols that takes CuArray argument.
|
||||
void CopyCols(const CuMatrixBase<Real> &src,
|
||||
const CuArray<MatrixIndexT> &indices);
|
||||
|
||||
|
||||
/// Copies row r from row indices[r] of src.
|
||||
/// As a special case, if indexes[i] <== -1, sets row i to zero
|
||||
/// "reorder".size() must equal this->NumRows(),
|
||||
|
@ -100,6 +105,13 @@ class CuMatrixBase {
|
|||
const std::vector<MatrixIndexT> &indices);
|
||||
|
||||
|
||||
/// For each row r of this and for each column c, sets (*this)(r, c) to the
|
||||
/// sum \sum_j src(r, j), where j ranges from indices[c].first through
|
||||
/// indices[c].second - 1.
|
||||
void SumColumnRanges(const CuMatrixBase<Real> &src,
|
||||
const CuArray<Int32Pair> &indices);
|
||||
|
||||
|
||||
friend Real TraceMatMat<Real>(const CuMatrixBase<Real> &A,
|
||||
const CuMatrixBase<Real> &B,
|
||||
MatrixTransposeType trans);
|
||||
|
|
|
@ -81,6 +81,10 @@ extern "C" {
|
|||
// decided to make this a void* pointer.
|
||||
} CuBlockMatrixData;
|
||||
|
||||
typedef struct Int32Pair {
|
||||
int32_cuda first;
|
||||
int32_cuda second;
|
||||
} Int32Pair;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -221,6 +221,7 @@ class CuVector: public CuVectorBase<Real> {
|
|||
CuVector(MatrixIndexT dim, MatrixResizeType t = kSetZero) { Resize(dim, t); }
|
||||
|
||||
CuVector(const CuVectorBase<Real> &v);
|
||||
|
||||
CuVector(const VectorBase<Real> &v);
|
||||
explicit CuVector(const CuVector<Real> &v) : CuVectorBase<Real>() {
|
||||
Resize(v.Dim(), kUndefined);
|
||||
|
|
|
@ -226,12 +226,14 @@ template<class IntType> class LatticeStringRepository {
|
|||
typedef unordered_set<const Entry*, EntryKey, EntryEqual> SetType;
|
||||
|
||||
void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
|
||||
if (to_add == NULL) return;
|
||||
else {
|
||||
while(true) {
|
||||
if (to_add == NULL) return;
|
||||
typename SetType::iterator iter = tmp_set->find(to_add);
|
||||
if (iter == tmp_set->end()) { // not in tmp_set.
|
||||
tmp_set->insert(to_add);
|
||||
RebuildHelper(to_add->parent, tmp_set); // make sure parent there.
|
||||
to_add = to_add->parent; // and loop.
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -88,7 +88,7 @@ template<class Label, class StringId> class StringRepository {
|
|||
else if (id>=single_symbol_start) {
|
||||
v->resize(1); (*v)[0] = id - single_symbol_start;
|
||||
} else {
|
||||
assert(id>=string_start && id < static_cast<StringId>(vec_.size()));
|
||||
assert(id >= string_start && id < static_cast<StringId>(vec_.size()));
|
||||
*v = *(vec_[id]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ LDLIBS += $(CUDA_LDLIBS)
|
|||
TESTFILES = nnet-test
|
||||
|
||||
OBJFILES = nnet-nnet.o nnet-component.o nnet-loss.o nnet-cache.o \
|
||||
nnet-cache-tgtmat.o nnet-loss-prior.o nnet-pdf-prior.o
|
||||
nnet-cache-tgtmat.o nnet-cache-conf.o nnet-loss-prior.o nnet-pdf-prior.o
|
||||
|
||||
LIBNAME = kaldi-nnet
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-activation.h
|
||||
|
||||
// Copyright 2011 Karel Vesely
|
||||
// Copyright 2011-2013 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -30,15 +30,14 @@ namespace nnet1 {
|
|||
|
||||
class Softmax : public Component {
|
||||
public:
|
||||
Softmax(int32 dim_in, int32 dim_out, Nnet *nnet)
|
||||
: Component(dim_in, dim_out, nnet)
|
||||
Softmax(int32 dim_in, int32 dim_out)
|
||||
: Component(dim_in, dim_out)
|
||||
{ }
|
||||
~Softmax()
|
||||
{ }
|
||||
|
||||
ComponentType GetType() const {
|
||||
return kSoftmax;
|
||||
}
|
||||
Component* Copy() const { return new Softmax(*this); }
|
||||
ComponentType GetType() const { return kSoftmax; }
|
||||
|
||||
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
|
||||
// y = e^x_j/sum_j(e^x_j)
|
||||
|
@ -60,15 +59,14 @@ class Softmax : public Component {
|
|||
|
||||
class Sigmoid : public Component {
|
||||
public:
|
||||
Sigmoid(int32 dim_in, int32 dim_out, Nnet *nnet)
|
||||
: Component(dim_in, dim_out, nnet)
|
||||
Sigmoid(int32 dim_in, int32 dim_out)
|
||||
: Component(dim_in, dim_out)
|
||||
{ }
|
||||
~Sigmoid()
|
||||
{ }
|
||||
|
||||
ComponentType GetType() const {
|
||||
return kSigmoid;
|
||||
}
|
||||
Component* Copy() const { return new Sigmoid(*this); }
|
||||
ComponentType GetType() const { return kSigmoid; }
|
||||
|
||||
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
|
||||
// y = 1/(1+e^-x)
|
||||
|
@ -86,15 +84,14 @@ class Sigmoid : public Component {
|
|||
|
||||
class Tanh : public Component {
|
||||
public:
|
||||
Tanh(int32 dim_in, int32 dim_out, Nnet *nnet)
|
||||
: Component(dim_in, dim_out, nnet)
|
||||
Tanh(int32 dim_in, int32 dim_out)
|
||||
: Component(dim_in, dim_out)
|
||||
{ }
|
||||
~Tanh()
|
||||
{ }
|
||||
|
||||
ComponentType GetType() const {
|
||||
return kTanh;
|
||||
}
|
||||
Component* Copy() const { return new Tanh(*this); }
|
||||
ComponentType GetType() const { return kTanh; }
|
||||
|
||||
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
|
||||
// y = (e^x - e^(-x)) / (e^x + e^(-x))
|
||||
|
@ -112,15 +109,14 @@ class Tanh : public Component {
|
|||
|
||||
class Dropout : public Component {
|
||||
public:
|
||||
Dropout(int32 dim_in, int32 dim_out, Nnet *nnet):
|
||||
Component(dim_in, dim_out, nnet)
|
||||
Dropout(int32 dim_in, int32 dim_out):
|
||||
Component(dim_in, dim_out)
|
||||
{ }
|
||||
~Dropout()
|
||||
{ }
|
||||
|
||||
ComponentType GetType() const {
|
||||
return kDropout;
|
||||
}
|
||||
Component* Copy() const { return new Dropout(*this); }
|
||||
ComponentType GetType() const { return kDropout; }
|
||||
|
||||
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
|
||||
out->CopyFromMat(in);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-affine-transform.h
|
||||
|
||||
// Copyright 2011 Karel Vesely
|
||||
// Copyright 2011 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -31,17 +31,16 @@ namespace nnet1 {
|
|||
|
||||
class AffineTransform : public UpdatableComponent {
|
||||
public:
|
||||
AffineTransform(int32 dim_in, int32 dim_out, Nnet *nnet)
|
||||
: UpdatableComponent(dim_in, dim_out, nnet),
|
||||
AffineTransform(int32 dim_in, int32 dim_out)
|
||||
: UpdatableComponent(dim_in, dim_out),
|
||||
linearity_(dim_out, dim_in), bias_(dim_out),
|
||||
linearity_corr_(dim_out, dim_in), bias_corr_(dim_out)
|
||||
{ }
|
||||
~AffineTransform()
|
||||
{ }
|
||||
|
||||
ComponentType GetType() const {
|
||||
return kAffineTransform;
|
||||
}
|
||||
Component* Copy() const { return new AffineTransform(*this); }
|
||||
ComponentType GetType() const { return kAffineTransform; }
|
||||
|
||||
void ReadData(std::istream &is, bool binary) {
|
||||
linearity_.Read(is, binary);
|
||||
|
|
|
@ -0,0 +1,247 @@
|
|||
// nnet/nnet-cache-conf.cc
|
||||
|
||||
// Copyright 2013 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "nnet/nnet-cache-conf.h"
|
||||
|
||||
#include "cudamatrix/cu-math.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace kaldi {
|
||||
namespace nnet1 {
|
||||
|
||||
|
||||
|
||||
void CacheConf::Init(int32 cachesize, int32 bunchsize) {
|
||||
|
||||
KALDI_ASSERT(cachesize>0);
|
||||
if(cachesize > 8388479) {
|
||||
KALDI_ERR << "CacheConfsize " << cachesize << " too large, use cachesize smaller than 8388480.";
|
||||
}
|
||||
KALDI_ASSERT(bunchsize>0);
|
||||
KALDI_ASSERT(cachesize>=bunchsize);
|
||||
|
||||
if ((cachesize % bunchsize) != 0) {
|
||||
KALDI_ERR << "Non divisible cachesize by bunchsize";
|
||||
}
|
||||
|
||||
cachesize_ = cachesize;
|
||||
bunchsize_ = bunchsize;
|
||||
|
||||
state_ = EMPTY;
|
||||
|
||||
filling_pos_ = 0;
|
||||
emptying_pos_ = 0;
|
||||
|
||||
randomized_ = false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void CacheConf::AddData(const CuMatrix<BaseFloat> &features, const std::vector<int32> &targets, const Vector<BaseFloat> &confidence) {
|
||||
if (state_ == FULL) {
|
||||
KALDI_ERR << "Cannot add data, cache already full";
|
||||
}
|
||||
|
||||
KALDI_ASSERT(features.NumRows() == static_cast<int32>(targets.size()));
|
||||
KALDI_ASSERT(features.NumRows() == static_cast<int32>(confidence.Dim()));
|
||||
|
||||
int32 dim_fea = features.NumCols();
|
||||
|
||||
// lazy buffers allocation
|
||||
if (features_.NumRows() != cachesize_) {
|
||||
features_.Resize(cachesize_, dim_fea);
|
||||
targets_.resize(cachesize_);
|
||||
confidence_.Resize(cachesize_);
|
||||
}
|
||||
|
||||
// warn if segment longer than half-cache
|
||||
// (frame level shuffling accross sentences will be poor)
|
||||
if (features.NumRows() > cachesize_/4) {
|
||||
KALDI_WARN << "Too long segment or small cachesize!"
|
||||
<< " (cache-size " << cachesize_ << ") < (4 x"
|
||||
<< " segment-size " << features.NumRows() << ").";
|
||||
}
|
||||
|
||||
// change state
|
||||
if (state_ == EMPTY) {
|
||||
state_ = FILLING; filling_pos_ = 0;
|
||||
|
||||
// check for leftover from previous segment
|
||||
int leftover = features_leftover_.NumRows();
|
||||
// check if leftover is not bigger than half-cachesize
|
||||
if (leftover > cachesize_/2) {
|
||||
KALDI_WARN << "Truncating "
|
||||
<< leftover - cachesize_/2
|
||||
<< " frames from leftover of previous segment "
|
||||
<< "(max leftover " << cachesize_/2 << ").";
|
||||
leftover = cachesize_/2;
|
||||
}
|
||||
// prefill cache with leftover
|
||||
if (leftover > 0) {
|
||||
features_.RowRange(0, leftover).CopyFromMat(
|
||||
features_leftover_.RowRange(0, leftover)
|
||||
);
|
||||
|
||||
std::copy(targets_leftover_.begin(),
|
||||
targets_leftover_.begin() + leftover,
|
||||
targets_.begin());
|
||||
|
||||
confidence_.Range(0, leftover).CopyFromVec(
|
||||
confidence_leftover_.Range(0, leftover)
|
||||
);
|
||||
|
||||
features_leftover_.Resize(0, 0);
|
||||
targets_leftover_.resize(0);
|
||||
confidence_leftover_.Resize(0);
|
||||
filling_pos_ += leftover;
|
||||
}
|
||||
}
|
||||
|
||||
KALDI_ASSERT(state_ == FILLING);
|
||||
KALDI_ASSERT(features.NumRows() == static_cast<MatrixIndexT>(targets.size()));
|
||||
|
||||
int cache_space = cachesize_ - filling_pos_;
|
||||
int feature_length = features.NumRows();
|
||||
int fill_rows = (cache_space<feature_length)? cache_space : feature_length;
|
||||
int leftover = feature_length - fill_rows;
|
||||
|
||||
KALDI_ASSERT(cache_space > 0);
|
||||
|
||||
// copy the data to cache
|
||||
features_.RowRange(filling_pos_, fill_rows).CopyFromMat(
|
||||
features.RowRange(0, fill_rows)
|
||||
);
|
||||
|
||||
std::copy(targets.begin(),
|
||||
targets.begin()+fill_rows,
|
||||
targets_.begin()+filling_pos_);
|
||||
|
||||
confidence_.Range(filling_pos_,fill_rows).
|
||||
CopyFromVec(confidence.Range(0,fill_rows));
|
||||
|
||||
// copy leftovers
|
||||
if (leftover > 0) {
|
||||
features_leftover_.Resize(leftover, dim_fea);
|
||||
features_leftover_.CopyFromMat(
|
||||
features.RowRange(fill_rows, leftover)
|
||||
);
|
||||
|
||||
KALDI_ASSERT(targets.end()-(targets.begin()+fill_rows)==leftover);
|
||||
targets_leftover_.resize(leftover);
|
||||
std::copy(targets.begin()+fill_rows,
|
||||
targets.end(),
|
||||
targets_leftover_.begin());
|
||||
|
||||
confidence_leftover_.Resize(leftover);
|
||||
confidence_leftover_.CopyFromVec(confidence.Range(fill_rows,leftover));
|
||||
}
|
||||
|
||||
// update cursor
|
||||
filling_pos_ += fill_rows;
|
||||
|
||||
// change state
|
||||
if (filling_pos_ == cachesize_) {
|
||||
state_ = FULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void CacheConf::Randomize() {
|
||||
KALDI_ASSERT(state_ == FULL || state_ == FILLING);
|
||||
|
||||
// lazy initialization of the output buffers
|
||||
features_random_.Resize(cachesize_, features_.NumCols());
|
||||
targets_random_.resize(cachesize_);
|
||||
confidence_random_.Resize(cachesize_);
|
||||
|
||||
// generate random series of integers
|
||||
randmask_.resize(filling_pos_);
|
||||
GenerateRandom randomizer;
|
||||
for(int32 i=0; i<filling_pos_; i++) { randmask_[i]=i; }
|
||||
std::random_shuffle(randmask_.begin(), randmask_.end(), randomizer);
|
||||
// get it to the gpu
|
||||
randmask_device_.CopyFromVec(randmask_);
|
||||
|
||||
// randomize the features
|
||||
cu::Randomize(features_, randmask_device_, &features_random_);
|
||||
// randomize the targets
|
||||
for(int32 i=0; i<filling_pos_; i++) {
|
||||
targets_random_[i] = targets_[randmask_[i]];
|
||||
}
|
||||
// randomize the confidences
|
||||
for(int32 i=0; i<filling_pos_; i++) {
|
||||
confidence_random_(i) = confidence_(randmask_[i]);
|
||||
}
|
||||
|
||||
randomized_ = true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void CacheConf::GetBunch(CuMatrix<BaseFloat> *features, std::vector<int32> *targets, Vector<BaseFloat> *confidence) {
|
||||
if (state_ == EMPTY) {
|
||||
KALDI_ERR << "GetBunch on empty cache!!!";
|
||||
}
|
||||
|
||||
// change state if full...
|
||||
if (state_ == FULL) {
|
||||
state_ = EMPTYING; emptying_pos_ = 0;
|
||||
}
|
||||
|
||||
// final cache is not completely filled
|
||||
if (state_ == FILLING) {
|
||||
state_ = EMPTYING; emptying_pos_ = 0;
|
||||
}
|
||||
|
||||
KALDI_ASSERT(state_ == EMPTYING);
|
||||
|
||||
const CuMatrixBase<BaseFloat> &features_ref = (randomized_ ?
|
||||
features_random_ : features_);
|
||||
const std::vector<int32> &targets_ref = (randomized_ ?
|
||||
targets_random_ : targets_);
|
||||
const Vector<BaseFloat> &confidence_ref = (randomized_ ?
|
||||
confidence_random_ : confidence_);
|
||||
|
||||
// init the output
|
||||
features->Resize(bunchsize_, features_.NumCols());
|
||||
targets->resize(bunchsize_);
|
||||
confidence->Resize(bunchsize_);
|
||||
|
||||
// copy the output
|
||||
features->CopyFromMat(features_ref.RowRange(emptying_pos_, bunchsize_));
|
||||
|
||||
std::copy(targets_ref.begin() + emptying_pos_,
|
||||
targets_ref.begin() + emptying_pos_ + bunchsize_,
|
||||
targets->begin());
|
||||
|
||||
confidence->CopyFromVec(confidence_ref.Range(emptying_pos_, bunchsize_));
|
||||
|
||||
// update position
|
||||
emptying_pos_ += bunchsize_;
|
||||
|
||||
// If we're done, change state to EMPTY
|
||||
if (emptying_pos_ > filling_pos_ - bunchsize_) {
|
||||
// we don't have more complete bunches...
|
||||
state_ = EMPTY;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace nnet1
|
||||
} // namespace kaldi
|
|
@ -0,0 +1,107 @@
|
|||
// nnet/nnet-cache-conf.h
|
||||
|
||||
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#ifndef KALDI_NNET_NNET_CACHE_CONF_H_
|
||||
#define KALDI_NNET_NNET_CACHE_CONF_H_
|
||||
|
||||
#include "base/kaldi-math.h"
|
||||
#include "cudamatrix/cu-matrix.h"
|
||||
#include "cudamatrix/cu-math.h"
|
||||
|
||||
namespace kaldi {
|
||||
namespace nnet1 {
|
||||
|
||||
/**
|
||||
* The feature-target pair cache
|
||||
*/
|
||||
class CacheConf {
|
||||
typedef enum { EMPTY, FILLING, FULL, EMPTYING } State;
|
||||
|
||||
public:
|
||||
CacheConf() : state_(EMPTY), filling_pos_(0), emptying_pos_(0),
|
||||
cachesize_(0), bunchsize_(0), randomized_(false)
|
||||
{ }
|
||||
~CacheConf() { }
|
||||
|
||||
/// Initialize the cache
|
||||
void Init(int32 cachesize, int32 bunchsize);
|
||||
|
||||
/// Add data to cache
|
||||
void AddData(const CuMatrix<BaseFloat> &features, const std::vector<int32> &targets, const Vector<BaseFloat> &confidence);
|
||||
/// Randomizes the cache
|
||||
void Randomize();
|
||||
/// Get the bunch of training data from cache
|
||||
void GetBunch(CuMatrix<BaseFloat> *features, std::vector<int32> *targets, Vector<BaseFloat> *confidence);
|
||||
|
||||
|
||||
/// Returns true if the cache was completely filled
|
||||
bool Full() {
|
||||
return (state_ == FULL);
|
||||
}
|
||||
|
||||
/// Returns true if the cache is empty
|
||||
bool Empty() {
|
||||
return (state_ == EMPTY || filling_pos_ < bunchsize_);
|
||||
}
|
||||
|
||||
/// Returns true if the cache is empty
|
||||
bool Randomized() {
|
||||
return randomized_;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
struct GenerateRandom {
|
||||
int32 operator()(int32 max) const {
|
||||
// return lrand48() % max;
|
||||
return RandInt(0, max-1);
|
||||
}
|
||||
};
|
||||
|
||||
State state_; ///< Current state of the cache
|
||||
|
||||
int32 filling_pos_; ///< Number of frames filled to cache by AddData
|
||||
int32 emptying_pos_; ///< Number of frames given by cache by GetBunch
|
||||
|
||||
int32 cachesize_; ///< Size of cache
|
||||
int32 bunchsize_; ///< Size of bunch
|
||||
|
||||
bool randomized_;
|
||||
|
||||
CuMatrix<BaseFloat> features_; ///< Feature cache
|
||||
CuMatrix<BaseFloat> features_random_; ///< Feature cache
|
||||
CuMatrix<BaseFloat> features_leftover_; ///< Feature cache
|
||||
|
||||
std::vector<int32> targets_; ///< Desired vector cache
|
||||
std::vector<int32> targets_random_; ///< Desired vector cache
|
||||
std::vector<int32> targets_leftover_; ///< Desired vector cache
|
||||
|
||||
Vector<BaseFloat> confidence_;
|
||||
Vector<BaseFloat> confidence_random_;
|
||||
Vector<BaseFloat> confidence_leftover_;
|
||||
|
||||
std::vector<int32> randmask_;
|
||||
CuArray<int32> randmask_device_;
|
||||
|
||||
};
|
||||
|
||||
|
||||
} // namespace nnet1
|
||||
} // namespace kaldi
|
||||
|
||||
#endif
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-cache-tgtmat.cc
|
||||
|
||||
// Copyright 2011 Karel Vesely
|
||||
// Copyright 2011 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-cache-tgtmat.h
|
||||
|
||||
// Copyright 2012 Karel Vesely
|
||||
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-cache.cc
|
||||
|
||||
// Copyright 2011 Karel Vesely
|
||||
// Copyright 2011 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-cache.h
|
||||
|
||||
// Copyright 2012 Karel Vesely
|
||||
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-component.cc
|
||||
|
||||
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
|
||||
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -64,14 +64,24 @@ Component::ComponentType Component::MarkerToType(const std::string &s) {
|
|||
}
|
||||
|
||||
|
||||
Component* Component::Read(std::istream &is, bool binary, Nnet *nnet) {
|
||||
Component* Component::Read(std::istream &is, bool binary) {
|
||||
int32 dim_out, dim_in;
|
||||
std::string token;
|
||||
|
||||
int first_char = Peek(is, binary);
|
||||
if (first_char == EOF) return NULL;
|
||||
|
||||
ReadToken(is, binary, &token);
|
||||
ReadToken(is, binary, &token);
|
||||
|
||||
// Skip optional initial token
|
||||
if(token == "<Nnet>") {
|
||||
ReadToken(is, binary, &token); // Next token is a Component
|
||||
}
|
||||
// Finish reading when optional terminal token appears
|
||||
if(token == "</Nnet>") {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Component::ComponentType comp_type = Component::MarkerToType(token);
|
||||
|
||||
ReadBasicType(is, binary, &dim_out);
|
||||
|
@ -80,34 +90,34 @@ Component* Component::Read(std::istream &is, bool binary, Nnet *nnet) {
|
|||
Component *p_comp=NULL;
|
||||
switch (comp_type) {
|
||||
case Component::kAffineTransform :
|
||||
p_comp = new AffineTransform(dim_in, dim_out, nnet);
|
||||
p_comp = new AffineTransform(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kSoftmax :
|
||||
p_comp = new Softmax(dim_in, dim_out, nnet);
|
||||
p_comp = new Softmax(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kSigmoid :
|
||||
p_comp = new Sigmoid(dim_in, dim_out, nnet);
|
||||
p_comp = new Sigmoid(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kTanh :
|
||||
p_comp = new Tanh(dim_in, dim_out, nnet);
|
||||
p_comp = new Tanh(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kDropout :
|
||||
p_comp = new Dropout(dim_in, dim_out, nnet);
|
||||
p_comp = new Dropout(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kRbm :
|
||||
p_comp = new Rbm(dim_in, dim_out, nnet);
|
||||
p_comp = new Rbm(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kSplice :
|
||||
p_comp = new Splice(dim_in, dim_out, nnet);
|
||||
p_comp = new Splice(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kCopy :
|
||||
p_comp = new Copy(dim_in, dim_out, nnet);
|
||||
p_comp = new CopyComponent(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kAddShift :
|
||||
p_comp = new AddShift(dim_in, dim_out, nnet);
|
||||
p_comp = new AddShift(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kRescale :
|
||||
p_comp = new Rescale(dim_in, dim_out, nnet);
|
||||
p_comp = new Rescale(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kUnknown :
|
||||
default :
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-component.h
|
||||
|
||||
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
|
||||
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -34,27 +34,17 @@
|
|||
namespace kaldi {
|
||||
namespace nnet1 {
|
||||
|
||||
// declare the nnet class so we can declare pointer
|
||||
struct NnetTrainOptions;
|
||||
class Nnet;
|
||||
|
||||
|
||||
/**
|
||||
* Abstract class, basic element of the network,
|
||||
* it is a box with defined inputs, outputs,
|
||||
* and tranformation functions interface.
|
||||
*
|
||||
* It is able to propagate and backpropagate
|
||||
* exact implementation is to be implemented in descendants.
|
||||
*
|
||||
* The data buffers are not included
|
||||
* and will be managed from outside.
|
||||
* Abstract class, building block of the network.
|
||||
* It is able to propagate (PropagateFnc: compute the output based on its input)
|
||||
* and backpropagate (BackpropagateFnc: i.e. transform loss derivative w.r.t. output to derivative w.r.t. the input)
|
||||
* the formulas are implemented in descendant classes (AffineTransform,Sigmoid,Softmax,...).
|
||||
*/
|
||||
class Component {
|
||||
|
||||
// Polymorphic Component RTTI
|
||||
/// Component type identification mechanism
|
||||
public:
|
||||
/// Types of the net components
|
||||
/// Types of the Components
|
||||
typedef enum {
|
||||
kUnknown = 0x0,
|
||||
|
||||
|
@ -77,23 +67,27 @@ class Component {
|
|||
kRescale,
|
||||
kLog
|
||||
} ComponentType;
|
||||
/// Pair of type and marker
|
||||
/// A pair of type and marker
|
||||
struct key_value {
|
||||
const Component::ComponentType key;
|
||||
const char *value;
|
||||
};
|
||||
/// Mapping of types and markers
|
||||
/// Mapping of types and markers (the table is defined in nnet-component.cc)
|
||||
static const struct key_value kMarkerMap[];
|
||||
/// Convert component type to marker
|
||||
static const char* TypeToMarker(ComponentType t);
|
||||
/// Convert marker to component type
|
||||
static ComponentType MarkerToType(const std::string &s);
|
||||
|
||||
Component(int32 input_dim, int32 output_dim, Nnet *nnet)
|
||||
: input_dim_(input_dim), output_dim_(output_dim), nnet_(nnet) { }
|
||||
virtual ~Component() { }
|
||||
|
||||
|
||||
/// General interface of a component
|
||||
public:
|
||||
Component(int32 input_dim, int32 output_dim)
|
||||
: input_dim_(input_dim), output_dim_(output_dim) { }
|
||||
virtual ~Component() { }
|
||||
|
||||
/// Copy component (deep copy).
|
||||
virtual Component* Copy() const = 0;
|
||||
|
||||
/// Get Type Identification of the component
|
||||
virtual ComponentType GetType() const = 0;
|
||||
/// Check if contains trainable parameters
|
||||
|
@ -110,28 +104,29 @@ class Component {
|
|||
return output_dim_;
|
||||
}
|
||||
|
||||
/// Perform forward pass propagateion Input->Output
|
||||
/// Perform forward pass propagation Input->Output
|
||||
void Propagate(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out);
|
||||
/// Perform backward pass propagation, out_diff -> in_diff
|
||||
/// '&in' and '&out' will often be unused...
|
||||
/// '&in' and '&out' will sometimes be unused...
|
||||
void Backpropagate(const CuMatrix<BaseFloat> &in,
|
||||
const CuMatrix<BaseFloat> &out,
|
||||
const CuMatrix<BaseFloat> &out_diff,
|
||||
CuMatrix<BaseFloat> *in_diff);
|
||||
|
||||
/// Read component from stream
|
||||
static Component* Read(std::istream &is, bool binary, Nnet *nnet);
|
||||
static Component* Read(std::istream &is, bool binary);
|
||||
/// Write component to stream
|
||||
void Write(std::ostream &os, bool binary) const;
|
||||
|
||||
/// Optionally print some additional info
|
||||
virtual std::string Info() const { return ""; }
|
||||
// abstract interface for propagation/backpropagation
|
||||
|
||||
/// Abstract interface for propagation/backpropagation
|
||||
protected:
|
||||
/// Forward pass transformation (to be implemented by descendents...)
|
||||
/// Forward pass transformation (to be implemented by descending class...)
|
||||
virtual void PropagateFnc(const CuMatrix<BaseFloat> &in,
|
||||
CuMatrix<BaseFloat> *out) = 0;
|
||||
/// Backward pass transformation (to be implemented by descendents...)
|
||||
/// Backward pass transformation (to be implemented by descending class...)
|
||||
virtual void BackpropagateFnc(const CuMatrix<BaseFloat> &in,
|
||||
const CuMatrix<BaseFloat> &out,
|
||||
const CuMatrix<BaseFloat> &out_diff,
|
||||
|
@ -144,26 +139,24 @@ class Component {
|
|||
virtual void WriteData(std::ostream &os, bool binary) const { }
|
||||
|
||||
|
||||
// data members
|
||||
/// Data members
|
||||
protected:
|
||||
int32 input_dim_; ///< Size of input vectors
|
||||
int32 output_dim_; ///< Size of output vectors
|
||||
|
||||
Nnet *nnet_; ///< Pointer to the whole network
|
||||
private:
|
||||
KALDI_DISALLOW_COPY_AND_ASSIGN(Component);
|
||||
protected:
|
||||
//KALDI_DISALLOW_COPY_AND_ASSIGN(Component);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Class UpdatableComponent is a Component which has
|
||||
* trainable parameters and contains SGD training
|
||||
* hyper-parameters (learnrate, momenutm, L2, L1)
|
||||
* Class UpdatableComponent is a Component which has trainable parameters,
|
||||
* contains SGD training hyper-parameters in NnetTrainOptions.
|
||||
*/
|
||||
class UpdatableComponent : public Component {
|
||||
public:
|
||||
UpdatableComponent(int32 input_dim, int32 output_dim, Nnet *nnet)
|
||||
: Component(input_dim, output_dim, nnet) { }
|
||||
UpdatableComponent(int32 input_dim, int32 output_dim)
|
||||
: Component(input_dim, output_dim) { }
|
||||
virtual ~UpdatableComponent() { }
|
||||
|
||||
/// Check if contains trainable parameters
|
||||
|
@ -176,7 +169,7 @@ class UpdatableComponent : public Component {
|
|||
const CuMatrix<BaseFloat> &diff) = 0;
|
||||
|
||||
/// Sets the training options to the component
|
||||
void SetTrainOptions(const NnetTrainOptions &opts) {
|
||||
virtual void SetTrainOptions(const NnetTrainOptions &opts) {
|
||||
opts_ = opts;
|
||||
}
|
||||
/// Gets the training options from the component
|
||||
|
@ -190,18 +183,17 @@ class UpdatableComponent : public Component {
|
|||
};
|
||||
|
||||
|
||||
|
||||
|
||||
inline void Component::Propagate(const CuMatrix<BaseFloat> &in,
|
||||
CuMatrix<BaseFloat> *out) {
|
||||
// Check the dims
|
||||
if (input_dim_ != in.NumCols()) {
|
||||
KALDI_ERR << "Nonmatching dims, component:" << input_dim_ << " data:" << in.NumCols();
|
||||
KALDI_ERR << "Non-matching dims, component:" << input_dim_ << " data:" << in.NumCols();
|
||||
}
|
||||
|
||||
// Allocate target buffer
|
||||
if (output_dim_ != out->NumCols() || in.NumRows() != out->NumRows()) {
|
||||
out->Resize(in.NumRows(), output_dim_);
|
||||
}
|
||||
|
||||
// Call the propagation implementation of the component
|
||||
PropagateFnc(in, out);
|
||||
}
|
||||
|
||||
|
@ -210,27 +202,26 @@ inline void Component::Backpropagate(const CuMatrix<BaseFloat> &in,
|
|||
const CuMatrix<BaseFloat> &out,
|
||||
const CuMatrix<BaseFloat> &out_diff,
|
||||
CuMatrix<BaseFloat> *in_diff) {
|
||||
//check the dims
|
||||
// Check the dims
|
||||
if (output_dim_ != out_diff.NumCols()) {
|
||||
KALDI_ERR << "Nonmatching output dims, component:" << output_dim_
|
||||
KALDI_ERR << "Non-matching output dims, component:" << output_dim_
|
||||
<< " data:" << out_diff.NumCols();
|
||||
}
|
||||
//allocate buffer
|
||||
// Allocate target buffer
|
||||
if (input_dim_ != in_diff->NumCols() || out_diff.NumRows() != in_diff->NumRows()) {
|
||||
in_diff->Resize(out_diff.NumRows(), input_dim_);
|
||||
}
|
||||
//asserts on the dims
|
||||
// Asserts on the dims
|
||||
KALDI_ASSERT((in.NumRows() == out.NumRows()) &&
|
||||
(in.NumRows() == out_diff.NumRows()) &&
|
||||
(in.NumRows() == in_diff->NumRows()));
|
||||
KALDI_ASSERT(in.NumCols() == in_diff->NumCols());
|
||||
KALDI_ASSERT(out.NumCols() == out_diff.NumCols());
|
||||
//call the backprop implementation of the component
|
||||
// Call the backprop implementation of the component
|
||||
BackpropagateFnc(in, out, out_diff, in_diff);
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace nnet1
|
||||
} // namespace kaldi
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-loss-prior.cc
|
||||
|
||||
// Copyright 2012 Karel Vesely
|
||||
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-loss-prior.h
|
||||
|
||||
// Copyright 2012 Karel Vesely
|
||||
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-loss.cc
|
||||
|
||||
// Copyright 2011 Karel Vesely
|
||||
// Copyright 2011 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-loss.h
|
||||
|
||||
// Copyright 2011 Karel Vesely
|
||||
// Copyright 2011 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-nnet.cc
|
||||
|
||||
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
|
||||
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -27,59 +27,66 @@
|
|||
namespace kaldi {
|
||||
namespace nnet1 {
|
||||
|
||||
Nnet::~Nnet() {
|
||||
for(int32 i=0; i<NumComponents(); i++) {
|
||||
delete components_[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Nnet::Propagate(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
|
||||
KALDI_ASSERT(NULL != out);
|
||||
|
||||
if (LayerCount() == 0) {
|
||||
if (NumComponents() == 0) {
|
||||
out->Resize(in.NumRows(), in.NumCols());
|
||||
out->CopyFromMat(in);
|
||||
return;
|
||||
}
|
||||
|
||||
// we need at least L+1 input buffers
|
||||
KALDI_ASSERT((int32)propagate_buf_.size() >= LayerCount()+1);
|
||||
KALDI_ASSERT((int32)propagate_buf_.size() >= NumComponents()+1);
|
||||
|
||||
|
||||
propagate_buf_[0].Resize(in.NumRows(), in.NumCols());
|
||||
propagate_buf_[0].CopyFromMat(in);
|
||||
|
||||
for(int32 i=0; i<(int32)nnet_.size(); i++) {
|
||||
nnet_[i]->Propagate(propagate_buf_[i], &propagate_buf_[i+1]);
|
||||
for(int32 i=0; i<(int32)components_.size(); i++) {
|
||||
components_[i]->Propagate(propagate_buf_[i], &propagate_buf_[i+1]);
|
||||
}
|
||||
|
||||
CuMatrix<BaseFloat> &mat = propagate_buf_[nnet_.size()];
|
||||
CuMatrix<BaseFloat> &mat = propagate_buf_[components_.size()];
|
||||
out->Resize(mat.NumRows(), mat.NumCols());
|
||||
out->CopyFromMat(mat);
|
||||
}
|
||||
|
||||
|
||||
void Nnet::Backpropagate(const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat> *in_diff) {
|
||||
if(LayerCount() == 0) { KALDI_ERR << "Cannot backpropagate on empty network"; }
|
||||
if(NumComponents() == 0) { KALDI_ERR << "Cannot backpropagate on empty network"; }
|
||||
|
||||
// we need at least L+1 input bufers
|
||||
KALDI_ASSERT((int32)propagate_buf_.size() >= LayerCount()+1);
|
||||
KALDI_ASSERT((int32)propagate_buf_.size() >= NumComponents()+1);
|
||||
// we need at least L-1 error derivative bufers
|
||||
KALDI_ASSERT((int32)backpropagate_buf_.size() >= LayerCount()-1);
|
||||
KALDI_ASSERT((int32)backpropagate_buf_.size() >= NumComponents()-1);
|
||||
|
||||
//////////////////////////////////////
|
||||
// Backpropagation
|
||||
//
|
||||
|
||||
// don't copy the out_diff to buffers, use it as is...
|
||||
int32 i = nnet_.size()-1;
|
||||
nnet_.back()->Backpropagate(propagate_buf_[i], propagate_buf_[i+1],
|
||||
// we don't copy the out_diff to buffers, we use it as it is...
|
||||
int32 i = components_.size()-1;
|
||||
components_.back()->Backpropagate(propagate_buf_[i], propagate_buf_[i+1],
|
||||
out_diff, &backpropagate_buf_[i-1]);
|
||||
if (nnet_[i]->IsUpdatable()) {
|
||||
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(nnet_[i]);
|
||||
if (components_[i]->IsUpdatable()) {
|
||||
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(components_[i]);
|
||||
uc->Update(propagate_buf_[i], out_diff);
|
||||
}
|
||||
|
||||
// backpropagate by using buffers
|
||||
for(i--; i >= 1; i--) {
|
||||
nnet_[i]->Backpropagate(propagate_buf_[i], propagate_buf_[i+1],
|
||||
components_[i]->Backpropagate(propagate_buf_[i], propagate_buf_[i+1],
|
||||
backpropagate_buf_[i], &backpropagate_buf_[i-1]);
|
||||
if (nnet_[i]->IsUpdatable()) {
|
||||
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(nnet_[i]);
|
||||
if (components_[i]->IsUpdatable()) {
|
||||
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(components_[i]);
|
||||
uc->Update(propagate_buf_[i], backpropagate_buf_[i]);
|
||||
}
|
||||
}
|
||||
|
@ -87,13 +94,13 @@ void Nnet::Backpropagate(const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat
|
|||
// now backpropagate through first layer,
|
||||
// but only if asked to (by in_diff pointer)
|
||||
if (NULL != in_diff) {
|
||||
nnet_[0]->Backpropagate(propagate_buf_[0], propagate_buf_[1],
|
||||
components_[0]->Backpropagate(propagate_buf_[0], propagate_buf_[1],
|
||||
backpropagate_buf_[0], in_diff);
|
||||
}
|
||||
|
||||
// update the first layer
|
||||
if (nnet_[0]->IsUpdatable()) {
|
||||
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(nnet_[0]);
|
||||
if (components_[0]->IsUpdatable()) {
|
||||
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(components_[0]);
|
||||
uc->Update(propagate_buf_[0], backpropagate_buf_[0]);
|
||||
}
|
||||
|
||||
|
@ -106,14 +113,14 @@ void Nnet::Backpropagate(const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat
|
|||
void Nnet::Feedforward(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
|
||||
KALDI_ASSERT(NULL != out);
|
||||
|
||||
if (LayerCount() == 0) {
|
||||
if (NumComponents() == 0) {
|
||||
out->Resize(in.NumRows(), in.NumCols());
|
||||
out->CopyFromMat(in);
|
||||
return;
|
||||
}
|
||||
|
||||
if (LayerCount() == 1) {
|
||||
nnet_[0]->Propagate(in, out);
|
||||
if (NumComponents() == 1) {
|
||||
components_[0]->Propagate(in, out);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -122,27 +129,81 @@ void Nnet::Feedforward(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out)
|
|||
|
||||
// propagate by using exactly 2 auxiliary buffers
|
||||
int32 L = 0;
|
||||
nnet_[L]->Propagate(in, &propagate_buf_[L%2]);
|
||||
for(L++; L<=LayerCount()-2; L++) {
|
||||
nnet_[L]->Propagate(propagate_buf_[(L-1)%2], &propagate_buf_[L%2]);
|
||||
components_[L]->Propagate(in, &propagate_buf_[L%2]);
|
||||
for(L++; L<=NumComponents()-2; L++) {
|
||||
components_[L]->Propagate(propagate_buf_[(L-1)%2], &propagate_buf_[L%2]);
|
||||
}
|
||||
nnet_[L]->Propagate(propagate_buf_[(L-1)%2], out);
|
||||
components_[L]->Propagate(propagate_buf_[(L-1)%2], out);
|
||||
// release the buffers we don't need anymore
|
||||
propagate_buf_[0].Resize(0,0);
|
||||
propagate_buf_[1].Resize(0,0);
|
||||
}
|
||||
|
||||
|
||||
int32 Nnet::OutputDim() const {
|
||||
KALDI_ASSERT(!components_.empty());
|
||||
return components_.back()->OutputDim();
|
||||
}
|
||||
|
||||
int32 Nnet::InputDim() const {
|
||||
KALDI_ASSERT(!components_.empty());
|
||||
return components_.front()->InputDim();
|
||||
}
|
||||
|
||||
const Component& Nnet::GetComponent(int32 component) const {
|
||||
KALDI_ASSERT(static_cast<size_t>(component) < components_.size());
|
||||
return *(components_[component]);
|
||||
}
|
||||
|
||||
Component& Nnet::GetComponent(int32 component) {
|
||||
KALDI_ASSERT(static_cast<size_t>(component) < components_.size());
|
||||
return *(components_[component]);
|
||||
}
|
||||
|
||||
void Nnet::SetComponent(int32 c, Component *component) {
|
||||
KALDI_ASSERT(static_cast<size_t>(c) < components_.size());
|
||||
delete components_[c];
|
||||
components_[c] = component;
|
||||
Check(); // Check that all the dimensions still match up.
|
||||
}
|
||||
|
||||
void Nnet::AppendComponent(Component* dynamically_allocated_comp) {
|
||||
components_.push_back(dynamically_allocated_comp);
|
||||
Check();
|
||||
}
|
||||
|
||||
void Nnet::AppendNnet(const Nnet& nnet_to_append) {
|
||||
for(int32 i=0; i<nnet_to_append.NumComponents(); i++) {
|
||||
AppendComponent(nnet_to_append.GetComponent(i).Copy());
|
||||
}
|
||||
Check();
|
||||
}
|
||||
|
||||
void Nnet::RemoveComponent(int32 component) {
|
||||
KALDI_ASSERT(component < NumComponents());
|
||||
Component* ptr = components_[component];
|
||||
components_.erase(components_.begin()+component);
|
||||
delete ptr;
|
||||
Check();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void Nnet::GetWeights(Vector<BaseFloat>* wei_copy) {
|
||||
wei_copy->Resize(NumParams());
|
||||
int32 pos = 0;
|
||||
//copy the params
|
||||
for(int32 n=0; n<nnet_.size(); n++) {
|
||||
if(nnet_[n]->IsUpdatable()) {
|
||||
switch(nnet_[n]->GetType()) {
|
||||
for(int32 n=0; n<components_.size(); n++) {
|
||||
if(components_[n]->IsUpdatable()) {
|
||||
switch(components_[n]->GetType()) {
|
||||
case Component::kAffineTransform : {
|
||||
//get the weights from CuMatrix to Matrix
|
||||
const CuMatrix<BaseFloat>& cu_mat =
|
||||
dynamic_cast<AffineTransform*>(nnet_[n])->GetLinearity();
|
||||
dynamic_cast<AffineTransform*>(components_[n])->GetLinearity();
|
||||
Matrix<BaseFloat> mat(cu_mat.NumRows(),cu_mat.NumCols());
|
||||
cu_mat.CopyToMat(&mat);
|
||||
//copy the the matrix row-by-row to the vector
|
||||
|
@ -151,7 +212,7 @@ void Nnet::GetWeights(Vector<BaseFloat>* wei_copy) {
|
|||
pos += mat_size;
|
||||
//get the biases from CuVector to Vector
|
||||
const CuVector<BaseFloat>& cu_vec =
|
||||
dynamic_cast<AffineTransform*>(nnet_[n])->GetBias();
|
||||
dynamic_cast<AffineTransform*>(components_[n])->GetBias();
|
||||
Vector<BaseFloat> vec(cu_vec.Dim());
|
||||
cu_vec.CopyToVec(&vec);
|
||||
//append biases to the supervector
|
||||
|
@ -161,7 +222,7 @@ void Nnet::GetWeights(Vector<BaseFloat>* wei_copy) {
|
|||
default :
|
||||
KALDI_ERR << "Unimplemented access to parameters "
|
||||
<< "of updatable component "
|
||||
<< Component::TypeToMarker(nnet_[n]->GetType());
|
||||
<< Component::TypeToMarker(components_[n]->GetType());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -172,12 +233,12 @@ void Nnet::GetWeights(Vector<BaseFloat>* wei_copy) {
|
|||
void Nnet::SetWeights(const Vector<BaseFloat>& wei_src) {
|
||||
KALDI_ASSERT(wei_src.Dim() == NumParams());
|
||||
int32 pos = 0;
|
||||
for(int32 n=0; n<nnet_.size(); n++) {
|
||||
if(nnet_[n]->IsUpdatable()) {
|
||||
switch(nnet_[n]->GetType()) {
|
||||
for(int32 n=0; n<components_.size(); n++) {
|
||||
if(components_[n]->IsUpdatable()) {
|
||||
switch(components_[n]->GetType()) {
|
||||
case Component::kAffineTransform : {
|
||||
//get the component
|
||||
AffineTransform* aff_t = dynamic_cast<AffineTransform*>(nnet_[n]);
|
||||
AffineTransform* aff_t = dynamic_cast<AffineTransform*>(components_[n]);
|
||||
//we need weight matrix with original dimensions
|
||||
const CuMatrix<BaseFloat>& cu_mat = aff_t->GetLinearity();
|
||||
Matrix<BaseFloat> mat(cu_mat.NumRows(),cu_mat.NumCols());
|
||||
|
@ -205,7 +266,7 @@ void Nnet::SetWeights(const Vector<BaseFloat>& wei_src) {
|
|||
default :
|
||||
KALDI_ERR << "Unimplemented access to parameters "
|
||||
<< "of updatable component "
|
||||
<< Component::TypeToMarker(nnet_[n]->GetType());
|
||||
<< Component::TypeToMarker(components_[n]->GetType());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -217,13 +278,13 @@ void Nnet::GetGradient(Vector<BaseFloat>* grad_copy) {
|
|||
grad_copy->Resize(NumParams());
|
||||
int32 pos = 0;
|
||||
//copy the params
|
||||
for(int32 n=0; n<nnet_.size(); n++) {
|
||||
if(nnet_[n]->IsUpdatable()) {
|
||||
switch(nnet_[n]->GetType()) {
|
||||
for(int32 n=0; n<components_.size(); n++) {
|
||||
if(components_[n]->IsUpdatable()) {
|
||||
switch(components_[n]->GetType()) {
|
||||
case Component::kAffineTransform : {
|
||||
//get the weights from CuMatrix to Matrix
|
||||
const CuMatrix<BaseFloat>& cu_mat =
|
||||
dynamic_cast<AffineTransform*>(nnet_[n])->GetLinearityCorr();
|
||||
dynamic_cast<AffineTransform*>(components_[n])->GetLinearityCorr();
|
||||
Matrix<BaseFloat> mat(cu_mat.NumRows(),cu_mat.NumCols());
|
||||
cu_mat.CopyToMat(&mat);
|
||||
//copy the the matrix row-by-row to the vector
|
||||
|
@ -232,7 +293,7 @@ void Nnet::GetGradient(Vector<BaseFloat>* grad_copy) {
|
|||
pos += mat_size;
|
||||
//get the biases from CuVector to Vector
|
||||
const CuVector<BaseFloat>& cu_vec =
|
||||
dynamic_cast<AffineTransform*>(nnet_[n])->GetBiasCorr();
|
||||
dynamic_cast<AffineTransform*>(components_[n])->GetBiasCorr();
|
||||
Vector<BaseFloat> vec(cu_vec.Dim());
|
||||
cu_vec.CopyToVec(&vec);
|
||||
//append biases to the supervector
|
||||
|
@ -242,7 +303,7 @@ void Nnet::GetGradient(Vector<BaseFloat>* grad_copy) {
|
|||
default :
|
||||
KALDI_ERR << "Unimplemented access to parameters "
|
||||
<< "of updatable component "
|
||||
<< Component::TypeToMarker(nnet_[n]->GetType());
|
||||
<< Component::TypeToMarker(components_[n]->GetType());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -252,14 +313,14 @@ void Nnet::GetGradient(Vector<BaseFloat>* grad_copy) {
|
|||
|
||||
int32 Nnet::NumParams() const {
|
||||
int32 n_params = 0;
|
||||
for(int32 n=0; n<nnet_.size(); n++) {
|
||||
if(nnet_[n]->IsUpdatable()) {
|
||||
switch(nnet_[n]->GetType()) {
|
||||
for(int32 n=0; n<components_.size(); n++) {
|
||||
if(components_[n]->IsUpdatable()) {
|
||||
switch(components_[n]->GetType()) {
|
||||
case Component::kAffineTransform :
|
||||
n_params += (1 + nnet_[n]->InputDim()) * nnet_[n]->OutputDim();
|
||||
n_params += (1 + components_[n]->InputDim()) * components_[n]->OutputDim();
|
||||
break;
|
||||
default :
|
||||
KALDI_WARN << Component::TypeToMarker(nnet_[n]->GetType())
|
||||
KALDI_WARN << Component::TypeToMarker(components_[n]->GetType())
|
||||
<< "is updatable, but its parameter count not implemented";
|
||||
}
|
||||
}
|
||||
|
@ -268,40 +329,95 @@ int32 Nnet::NumParams() const {
|
|||
}
|
||||
|
||||
|
||||
void Nnet::Read(std::istream &in, bool binary) {
|
||||
void Nnet::Read(const std::string &file) {
|
||||
bool binary;
|
||||
Input in(file, &binary);
|
||||
Read(in.Stream(), binary);
|
||||
in.Close();
|
||||
// Warn if the NN is empty
|
||||
if(NumComponents() == 0) {
|
||||
KALDI_WARN << "The network '" << file << "' is empty.";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Nnet::Read(std::istream &is, bool binary) {
|
||||
// get the network layers from a factory
|
||||
Component *comp;
|
||||
while (NULL != (comp = Component::Read(in, binary, this))) {
|
||||
if (LayerCount() > 0 && nnet_.back()->OutputDim() != comp->InputDim()) {
|
||||
while (NULL != (comp = Component::Read(is, binary))) {
|
||||
if (NumComponents() > 0 && components_.back()->OutputDim() != comp->InputDim()) {
|
||||
KALDI_ERR << "Dimensionality mismatch!"
|
||||
<< " Previous layer output:" << nnet_.back()->OutputDim()
|
||||
<< " Previous layer output:" << components_.back()->OutputDim()
|
||||
<< " Current layer input:" << comp->InputDim();
|
||||
}
|
||||
nnet_.push_back(comp);
|
||||
components_.push_back(comp);
|
||||
}
|
||||
// create empty buffers
|
||||
propagate_buf_.resize(LayerCount()+1);
|
||||
backpropagate_buf_.resize(LayerCount()-1);
|
||||
propagate_buf_.resize(NumComponents()+1);
|
||||
backpropagate_buf_.resize(NumComponents()-1);
|
||||
// reset learn rate
|
||||
opts_.learn_rate = 0.0;
|
||||
|
||||
Check(); //check consistency (dims...)
|
||||
}
|
||||
|
||||
|
||||
void Nnet::Write(const std::string &file, bool binary) {
|
||||
Output out(file, binary, true);
|
||||
Write(out.Stream(), binary);
|
||||
out.Close();
|
||||
}
|
||||
|
||||
|
||||
void Nnet::Write(std::ostream &os, bool binary) {
|
||||
Check();
|
||||
WriteToken(os, binary, "<Nnet>");
|
||||
if(binary == false) os << std::endl;
|
||||
for(int32 i=0; i<NumComponents(); i++) {
|
||||
components_[i]->Write(os, binary);
|
||||
}
|
||||
WriteToken(os, binary, "</Nnet>");
|
||||
if(binary == false) os << std::endl;
|
||||
}
|
||||
|
||||
|
||||
std::string Nnet::Info() const {
|
||||
std::ostringstream ostr;
|
||||
ostr << "num-components " << LayerCount() << std::endl;
|
||||
ostr << "num-components " << NumComponents() << std::endl;
|
||||
ostr << "input-dim " << InputDim() << std::endl;
|
||||
ostr << "output-dim " << OutputDim() << std::endl;
|
||||
ostr << "number-of-parameters " << static_cast<float>(NumParams())/1e6
|
||||
<< " millions" << std::endl;
|
||||
for (int32 i = 0; i < LayerCount(); i++)
|
||||
for (int32 i = 0; i < NumComponents(); i++)
|
||||
ostr << "component " << i+1 << " : "
|
||||
<< Component::TypeToMarker(nnet_[i]->GetType())
|
||||
<< ", input-dim " << nnet_[i]->InputDim()
|
||||
<< ", output-dim " << nnet_[i]->OutputDim()
|
||||
<< ", " << nnet_[i]->Info() << std::endl;
|
||||
<< Component::TypeToMarker(components_[i]->GetType())
|
||||
<< ", input-dim " << components_[i]->InputDim()
|
||||
<< ", output-dim " << components_[i]->OutputDim()
|
||||
<< ", " << components_[i]->Info() << std::endl;
|
||||
return ostr.str();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Nnet::Check() const {
|
||||
for (size_t i = 0; i + 1 < components_.size(); i++) {
|
||||
KALDI_ASSERT(components_[i] != NULL);
|
||||
int32 output_dim = components_[i]->OutputDim(),
|
||||
next_input_dim = components_[i+1]->InputDim();
|
||||
KALDI_ASSERT(output_dim == next_input_dim);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Nnet::SetTrainOptions(const NnetTrainOptions& opts) {
|
||||
opts_ = opts;
|
||||
//set values to individual components
|
||||
for (int32 l=0; l<NumComponents(); l++) {
|
||||
if(GetComponent(l).IsUpdatable()) {
|
||||
dynamic_cast<UpdatableComponent&>(GetComponent(l)).SetTrainOptions(opts_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace nnet1
|
||||
} // namespace kaldi
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-nnet.h
|
||||
|
||||
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
|
||||
// Copyright 2011-2013 Brno University of Technology (Author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -51,31 +51,28 @@ class Nnet {
|
|||
int32 InputDim() const;
|
||||
/// Dimensionality of network outputs (posteriors | bn-features | etc.)
|
||||
int32 OutputDim() const;
|
||||
|
||||
/// Returns number of layers in the network
|
||||
int32 LayerCount() const {
|
||||
return nnet_.size();
|
||||
}
|
||||
/// Access to an individual layer (unprotected)
|
||||
Component* Layer(int32 index) {
|
||||
return nnet_[index];
|
||||
}
|
||||
/// Get the position of a layer in the network
|
||||
int32 IndexOfLayer(const Component& comp) const;
|
||||
|
||||
/// Returns number of components-- think of this as similar to # of layers, but
|
||||
/// e.g. the nonlinearity and the linear part count as separate components,
|
||||
/// so the number of components will be more than the number of layers.
|
||||
int32 NumComponents() const { return components_.size(); }
|
||||
|
||||
const Component& GetComponent(int32 c) const;
|
||||
Component& GetComponent(int32 c);
|
||||
|
||||
/// Sets the c'th component to "component", taking ownership of the pointer
|
||||
/// and deleting the corresponding one that we own.
|
||||
void SetComponent(int32 c, Component *component);
|
||||
|
||||
/// Add another layer
|
||||
/// Warning : the Nnet over-takes responsibility for freeing the memory
|
||||
/// so use dynamically allocated Component only!
|
||||
void AppendLayer(Component* dynamically_allocated_comp);
|
||||
/// Concatenate the network
|
||||
/// Warning : this is destructive, the arg src_nnet_will_be_empty
|
||||
/// will be empty network after calling this method
|
||||
void Concatenate(Nnet* src_nnet_will_be_empty);
|
||||
/// Remove layer (checks for meaningful dimensions after removal)
|
||||
void RemoveLayer(int32 index);
|
||||
void RemoveLastLayer() {
|
||||
RemoveLayer(LayerCount()-1);
|
||||
}
|
||||
/// Appends this component to the components already in the neural net.
|
||||
/// Takes ownership of the pointer
|
||||
void AppendComponent(Component *dynamically_allocated_comp);
|
||||
/// Append another network to the current one (copy components).
|
||||
void AppendNnet(const Nnet& nnet_to_append);
|
||||
|
||||
/// Remove component
|
||||
void RemoveComponent(int32 c);
|
||||
void RemoveLastComponent() { RemoveComponent(NumComponents()-1); }
|
||||
|
||||
/// Access to forward pass buffers
|
||||
const std::vector<CuMatrix<BaseFloat> >& PropagateBuffer() const {
|
||||
|
@ -86,7 +83,7 @@ class Nnet {
|
|||
return backpropagate_buf_;
|
||||
}
|
||||
|
||||
/// get the number of parameters in the network
|
||||
/// Get the number of parameters in the network
|
||||
int32 NumParams() const;
|
||||
/// Get the network weights in a supervector
|
||||
void GetWeights(Vector<BaseFloat>* wei_copy);
|
||||
|
@ -103,8 +100,11 @@ class Nnet {
|
|||
void Write(const std::string &file, bool binary);
|
||||
/// Write MLP to stream
|
||||
void Write(std::ostream &out, bool binary);
|
||||
|
||||
/// Create string with human readable description of the nnet instance
|
||||
std::string Info() const;
|
||||
/// Consistency check.
|
||||
void Check() const;
|
||||
|
||||
/// Set training hyper-parameters to the network and its UpdatableComponent(s)
|
||||
void SetTrainOptions(const NnetTrainOptions& opts);
|
||||
|
@ -114,11 +114,9 @@ class Nnet {
|
|||
}
|
||||
|
||||
private:
|
||||
/// NnetType is alias to vector of components
|
||||
typedef std::vector<Component*> NnetType;
|
||||
/// Vector which contains all the layers composing the network network,
|
||||
/// also non-linearities (sigmoid|softmax|tanh|...) are considered as layers.
|
||||
NnetType nnet_;
|
||||
/// Vector which contains all the components composing the neural network,
|
||||
/// the components are for example: AffineTransform, Sigmoid, Softmax
|
||||
std::vector<Component*> components_;
|
||||
|
||||
std::vector<CuMatrix<BaseFloat> > propagate_buf_; ///< buffers for forward pass
|
||||
std::vector<CuMatrix<BaseFloat> > backpropagate_buf_; ///< buffers for backward pass
|
||||
|
@ -130,109 +128,6 @@ class Nnet {
|
|||
};
|
||||
|
||||
|
||||
inline Nnet::~Nnet() {
|
||||
// delete all the components
|
||||
NnetType::iterator it;
|
||||
for(it=nnet_.begin(); it!=nnet_.end(); ++it) {
|
||||
delete *it;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline int32 Nnet::InputDim() const {
|
||||
if (LayerCount() == 0) {
|
||||
KALDI_ERR << "No layers in MLP";
|
||||
}
|
||||
return nnet_.front()->InputDim();
|
||||
}
|
||||
|
||||
|
||||
inline int32 Nnet::OutputDim() const {
|
||||
if (LayerCount() <= 0) {
|
||||
KALDI_ERR << "No layers in MLP";
|
||||
}
|
||||
return nnet_.back()->OutputDim();
|
||||
}
|
||||
|
||||
|
||||
inline int32 Nnet::IndexOfLayer(const Component &comp) const {
|
||||
for(int32 i=0; i<LayerCount(); i++) {
|
||||
if (&comp == nnet_[i]) return i;
|
||||
}
|
||||
KALDI_ERR << "Component:" << &comp
|
||||
<< " type:" << comp.GetType()
|
||||
<< " not found in the MLP";
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
inline void Nnet::AppendLayer(Component* dynamically_allocated_comp) {
|
||||
if(LayerCount() > 0) {
|
||||
KALDI_ASSERT(OutputDim() == dynamically_allocated_comp->InputDim());
|
||||
}
|
||||
nnet_.push_back(dynamically_allocated_comp);
|
||||
}
|
||||
|
||||
|
||||
inline void Nnet::Concatenate(Nnet* src_nnet_will_be_empty) {
|
||||
if(LayerCount() > 0) {
|
||||
KALDI_ASSERT(OutputDim() == src_nnet_will_be_empty->InputDim());
|
||||
}
|
||||
nnet_.insert(nnet_.end(),
|
||||
src_nnet_will_be_empty->nnet_.begin(),
|
||||
src_nnet_will_be_empty->nnet_.end());
|
||||
src_nnet_will_be_empty->nnet_.clear();
|
||||
}
|
||||
|
||||
|
||||
inline void Nnet::RemoveLayer(int32 index) {
|
||||
//make sure we don't break the dimensionalities in the nnet
|
||||
KALDI_ASSERT(index < LayerCount());
|
||||
KALDI_ASSERT(index == LayerCount()-1 || Layer(index)->InputDim() == Layer(index)->OutputDim());
|
||||
//remove element from the vector
|
||||
Component* ptr = nnet_[index];
|
||||
nnet_.erase(nnet_.begin()+index);
|
||||
delete ptr;
|
||||
}
|
||||
|
||||
|
||||
inline void Nnet::Read(const std::string &file) {
|
||||
bool binary;
|
||||
Input in(file, &binary);
|
||||
Read(in.Stream(), binary);
|
||||
in.Close();
|
||||
// Warn if the NN is empty
|
||||
if(LayerCount() == 0) {
|
||||
KALDI_WARN << "The network '" << file << "' is empty.";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void Nnet::Write(const std::string &file, bool binary) {
|
||||
Output out(file, binary, true);
|
||||
Write(out.Stream(), binary);
|
||||
out.Close();
|
||||
}
|
||||
|
||||
|
||||
inline void Nnet::Write(std::ostream &out, bool binary) {
|
||||
for(int32 i=0; i<LayerCount(); i++) {
|
||||
nnet_[i]->Write(out, binary);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void Nnet::SetTrainOptions(const NnetTrainOptions& opts) {
|
||||
opts_ = opts;
|
||||
//set values to individual components
|
||||
for (int32 l=0; l<LayerCount(); l++) {
|
||||
if(Layer(l)->IsUpdatable()) {
|
||||
dynamic_cast<UpdatableComponent*>(Layer(l))->SetTrainOptions(opts_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace nnet1
|
||||
} // namespace kaldi
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-rbm.h
|
||||
|
||||
// Copyright 2012-2013 Brno University of Technology (Author: Karel Vesely)
|
||||
// Copyright 2012-2013 Brno University of Technology (Author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -35,8 +35,8 @@ class RbmBase : public UpdatableComponent {
|
|||
GAUSSIAN
|
||||
} RbmNodeType;
|
||||
|
||||
RbmBase(int32 dim_in, int32 dim_out, Nnet *nnet)
|
||||
: UpdatableComponent(dim_in, dim_out, nnet)
|
||||
RbmBase(int32 dim_in, int32 dim_out)
|
||||
: UpdatableComponent(dim_in, dim_out)
|
||||
{ }
|
||||
|
||||
/*Is included in Component:: itf
|
||||
|
@ -85,8 +85,8 @@ class RbmBase : public UpdatableComponent {
|
|||
// RBMs use RbmUpdate(.)
|
||||
void Update(const CuMatrix<BaseFloat> &input, const CuMatrix<BaseFloat> &diff) { }
|
||||
// RBMs use option class RbmTrainOptions
|
||||
void SetTrainOptions(const NnetTrainOptions&);
|
||||
const NnetTrainOptions& GetTrainOptions() const;
|
||||
void SetTrainOptions(const NnetTrainOptions&) { }
|
||||
const NnetTrainOptions& GetTrainOptions() const { }
|
||||
NnetTrainOptions opts_;
|
||||
//
|
||||
////
|
||||
|
@ -97,15 +97,14 @@ class RbmBase : public UpdatableComponent {
|
|||
|
||||
class Rbm : public RbmBase {
|
||||
public:
|
||||
Rbm(int32 dim_in, int32 dim_out, Nnet *nnet)
|
||||
: RbmBase(dim_in, dim_out, nnet)
|
||||
Rbm(int32 dim_in, int32 dim_out)
|
||||
: RbmBase(dim_in, dim_out)
|
||||
{ }
|
||||
~Rbm()
|
||||
{ }
|
||||
|
||||
ComponentType GetType() const {
|
||||
return kRbm;
|
||||
}
|
||||
Component* Copy() const { return new Rbm(*this); }
|
||||
ComponentType GetType() const { return kRbm; }
|
||||
|
||||
void ReadData(std::istream &is, bool binary) {
|
||||
std::string vis_node_type, hid_node_type;
|
||||
|
@ -164,7 +163,7 @@ class Rbm : public RbmBase {
|
|||
|
||||
void BackpropagateFnc(const CuMatrix<BaseFloat> &in, const CuMatrix<BaseFloat> &out,
|
||||
const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat> *in_diff) {
|
||||
KALDI_ERR << "Cannot backpropagate through RBM!"
|
||||
KALDI_ERR << "Cannot back-propagate through RBM!"
|
||||
<< "Better convert it to <affinetransform> and <sigmoid>";
|
||||
}
|
||||
virtual void Update(const CuMatrix<BaseFloat> &input,
|
||||
|
@ -226,9 +225,9 @@ class Rbm : public RbmBase {
|
|||
// should be about the same. The model is particularly sensitive at the very
|
||||
// beginning of the CD-1 training.
|
||||
//
|
||||
// We compute varinace of a)input minibatch b)reconstruction.
|
||||
// We compute variance of a)input mini-batch b)reconstruction.
|
||||
// When the ratio b)/a) is larger than 2, we:
|
||||
// 1. scale down the weights and biases by b)/a) (for next minibatch b)/a) gets 1.0)
|
||||
// 1. scale down the weights and biases by b)/a) (for next mini-batch b)/a) gets 1.0)
|
||||
// 2. shrink learning rate by 0.9x
|
||||
// 3. reset the momentum buffer
|
||||
//
|
||||
|
@ -255,7 +254,7 @@ class Rbm : public RbmBase {
|
|||
pos_vis_stddev.MulElements(pos_vis_mean_h);
|
||||
pos_vis_stddev.Scale(-1.0);
|
||||
pos_vis_stddev.AddVec(1.0/pos_vis.NumRows(),pos_vis_second_h);
|
||||
/* set negtive values to zero before the square root */
|
||||
/* set negative values to zero before the square root */
|
||||
for (int32 i=0; i<pos_vis_stddev.Dim(); i++) {
|
||||
if(pos_vis_stddev(i) < 0.0) {
|
||||
KALDI_WARN << "Forcing the variance to be non-negative! (set to zero)"
|
||||
|
@ -282,7 +281,7 @@ class Rbm : public RbmBase {
|
|||
neg_vis_stddev.MulElements(neg_vis_mean_h);
|
||||
neg_vis_stddev.Scale(-1.0);
|
||||
neg_vis_stddev.AddVec(1.0/neg_vis.NumRows(),neg_vis_second_h);
|
||||
/* set negtive values to zero before the square root */
|
||||
/* set negative values to zero before the square root */
|
||||
for (int32 i=0; i<neg_vis_stddev.Dim(); i++) {
|
||||
if(neg_vis_stddev(i) < 0.0) {
|
||||
KALDI_WARN << "Forcing the variance to be non-negative! (set to zero)"
|
||||
|
@ -306,12 +305,12 @@ class Rbm : public RbmBase {
|
|||
vis_bias_corr_.SetZero();
|
||||
hid_bias_corr_.SetZero();
|
||||
|
||||
KALDI_WARN << "Discrepancy between pos_hid and neg_hid varainces, "
|
||||
KALDI_WARN << "Discrepancy between pos_hid and neg_hid variances, "
|
||||
<< "danger of weight explosion. a) Reducing weights with scale " << scale
|
||||
<< " b) Lowering learning rate to " << rbm_opts_.learn_rate
|
||||
<< " [pos_vis_stddev(~1.0):" << pos_vis_stddev.Sum()/pos_vis.NumCols()
|
||||
<< ",neg_vis_stddev:" << neg_vis_stddev.Sum()/neg_vis.NumCols() << "]";
|
||||
return; /* ie. don't update weights with current stats */
|
||||
return; /* i.e. don't update weights with current stats */
|
||||
}
|
||||
}
|
||||
//
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-test.cc
|
||||
|
||||
// Copyright 2010 Karel Vesely
|
||||
// Copyright 2010 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-trnopts.h
|
||||
|
||||
// Copyright 2013 Brno University of Technology (Author: Karel Vesely)
|
||||
// Copyright 2013 Brno University of Technology (Author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnet/nnet-various.h
|
||||
|
||||
// Copyright 2012 Karel Vesely
|
||||
// Copyright 2012-2013 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -49,9 +49,9 @@ std::string MomentStatistics(const Vector<Real> &vec) {
|
|||
vec_aux.MulElements(vec); // (vec-mean)^3
|
||||
Real skewness = vec_aux.Sum() / pow(variance, 3.0/2.0) / vec.Dim();
|
||||
// kurtosis (peakedness)
|
||||
// - makes sence for symmetric distributions (skewness is zero)
|
||||
// - makes sense for symmetric distributions (skewness is zero)
|
||||
// - positive : 'sharper peak' than Normal distribution
|
||||
// - negtive : 'heavier tails' than Normal distribution
|
||||
// - negative : 'heavier tails' than Normal distribution
|
||||
// - zero : same peakedness as the Normal distribution
|
||||
vec_aux.MulElements(vec); // (vec-mean)^4
|
||||
Real kurtosis = vec_aux.Sum() / (variance * variance) / vec.Dim() - 3.0;
|
||||
|
@ -66,6 +66,9 @@ std::string MomentStatistics(const Vector<Real> &vec) {
|
|||
return ostr.str();
|
||||
}
|
||||
|
||||
/**
|
||||
* Overload MomentStatistics to Matrix<Real>
|
||||
*/
|
||||
template <typename Real>
|
||||
std::string MomentStatistics(const Matrix<Real> &mat) {
|
||||
Vector<Real> vec(mat.NumRows()*mat.NumCols());
|
||||
|
@ -73,6 +76,9 @@ std::string MomentStatistics(const Matrix<Real> &mat) {
|
|||
return MomentStatistics(vec);
|
||||
}
|
||||
|
||||
/**
|
||||
* Overload MomentStatistics to CuVector<Real>
|
||||
*/
|
||||
template <typename Real>
|
||||
std::string MomentStatistics(const CuVector<Real> &vec) {
|
||||
Vector<Real> vec_host(vec.Dim());
|
||||
|
@ -80,6 +86,9 @@ std::string MomentStatistics(const CuVector<Real> &vec) {
|
|||
return MomentStatistics(vec_host);
|
||||
}
|
||||
|
||||
/**
|
||||
* Overload MomentStatistics to CuMatrix<Real>
|
||||
*/
|
||||
template <typename Real>
|
||||
std::string MomentStatistics(const CuMatrix<Real> &mat) {
|
||||
Matrix<Real> mat_host(mat.NumRows(),mat.NumCols());
|
||||
|
@ -96,26 +105,25 @@ std::string MomentStatistics(const CuMatrix<Real> &mat) {
|
|||
*/
|
||||
class Splice : public Component {
|
||||
public:
|
||||
Splice(int32 dim_in, int32 dim_out, Nnet *nnet)
|
||||
: Component(dim_in, dim_out, nnet)
|
||||
Splice(int32 dim_in, int32 dim_out)
|
||||
: Component(dim_in, dim_out)
|
||||
{ }
|
||||
~Splice()
|
||||
{ }
|
||||
|
||||
ComponentType GetType() const {
|
||||
return kSplice;
|
||||
}
|
||||
Component* Copy() const { return new Splice(*this); }
|
||||
ComponentType GetType() const { return kSplice; }
|
||||
|
||||
void ReadData(std::istream &is, bool binary) {
|
||||
//read double vector
|
||||
// read double vector
|
||||
Vector<double> vec_d;
|
||||
vec_d.Read(is, binary);
|
||||
//convert to int vector
|
||||
// convert to int vector
|
||||
std::vector<int32> vec_i(vec_d.Dim());
|
||||
for(int32 i=0; i<vec_d.Dim(); i++) {
|
||||
vec_i[i] = round(vec_d(i));
|
||||
}
|
||||
//push to GPU
|
||||
// push to GPU
|
||||
frame_offsets_.CopyFromVec(vec_i);
|
||||
}
|
||||
|
||||
|
@ -155,30 +163,29 @@ class Splice : public Component {
|
|||
/**
|
||||
* Rearrange the matrix columns according to the indices in copy_from_indices_
|
||||
*/
|
||||
class Copy : public Component {
|
||||
class CopyComponent: public Component {
|
||||
public:
|
||||
Copy(int32 dim_in, int32 dim_out, Nnet *nnet)
|
||||
: Component(dim_in, dim_out, nnet)
|
||||
CopyComponent(int32 dim_in, int32 dim_out)
|
||||
: Component(dim_in, dim_out)
|
||||
{ }
|
||||
~Copy()
|
||||
~CopyComponent()
|
||||
{ }
|
||||
|
||||
ComponentType GetType() const {
|
||||
return kCopy;
|
||||
}
|
||||
Component* Copy() const { return new CopyComponent(*this); }
|
||||
ComponentType GetType() const { return kCopy; }
|
||||
|
||||
void ReadData(std::istream &is, bool binary) {
|
||||
//read double vector
|
||||
// read double vector
|
||||
Vector<double> vec_d;
|
||||
vec_d.Read(is, binary);
|
||||
//subtract 1
|
||||
// subtract 1
|
||||
vec_d.Add(-1.0);
|
||||
//convert to int vector
|
||||
// convert to int vector
|
||||
std::vector<int32> vec_i(vec_d.Dim());
|
||||
for(int32 i=0; i<vec_d.Dim(); i++) {
|
||||
vec_i[i] = round(vec_d(i));
|
||||
}
|
||||
//push to GPU
|
||||
// push to GPU
|
||||
copy_from_indices_.CopyFromVec(vec_i);
|
||||
}
|
||||
|
||||
|
@ -222,15 +229,14 @@ class Copy : public Component {
|
|||
*/
|
||||
class AddShift : public Component {
|
||||
public:
|
||||
AddShift(int32 dim_in, int32 dim_out, Nnet *nnet)
|
||||
: Component(dim_in, dim_out, nnet), shift_data_(dim_in)
|
||||
AddShift(int32 dim_in, int32 dim_out)
|
||||
: Component(dim_in, dim_out), shift_data_(dim_in)
|
||||
{ }
|
||||
~AddShift()
|
||||
{ }
|
||||
|
||||
ComponentType GetType() const {
|
||||
return kAddShift;
|
||||
}
|
||||
Component* Copy() const { return new AddShift(*this); }
|
||||
ComponentType GetType() const { return kAddShift; }
|
||||
|
||||
void ReadData(std::istream &is, bool binary) {
|
||||
//read the shift data
|
||||
|
@ -279,18 +285,17 @@ class AddShift : public Component {
|
|||
*/
|
||||
class Rescale : public Component {
|
||||
public:
|
||||
Rescale(int32 dim_in, int32 dim_out, Nnet *nnet)
|
||||
: Component(dim_in, dim_out, nnet), scale_data_(dim_in)
|
||||
Rescale(int32 dim_in, int32 dim_out)
|
||||
: Component(dim_in, dim_out), scale_data_(dim_in)
|
||||
{ }
|
||||
~Rescale()
|
||||
{ }
|
||||
|
||||
ComponentType GetType() const {
|
||||
return kRescale;
|
||||
}
|
||||
Component* Copy() const { return new Rescale(*this); }
|
||||
ComponentType GetType() const { return kRescale; }
|
||||
|
||||
void ReadData(std::istream &is, bool binary) {
|
||||
//read the shift data
|
||||
// read the shift data
|
||||
scale_data_.Read(is, binary);
|
||||
}
|
||||
|
||||
|
@ -304,18 +309,18 @@ class Rescale : public Component {
|
|||
|
||||
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
|
||||
out->CopyFromMat(in);
|
||||
//rescale the data
|
||||
// rescale the data
|
||||
out->MulColsVec(scale_data_);
|
||||
}
|
||||
|
||||
void BackpropagateFnc(const CuMatrix<BaseFloat> &in, const CuMatrix<BaseFloat> &out,
|
||||
const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat> *in_diff) {
|
||||
in_diff->CopyFromMat(out_diff);
|
||||
//derivative gets also scaled by the scale_data_
|
||||
// derivative gets also scaled by the scale_data_
|
||||
in_diff->MulColsVec(scale_data_);
|
||||
}
|
||||
|
||||
//Data accessors
|
||||
// Data accessors
|
||||
const CuVector<BaseFloat>& GetScaleVec() {
|
||||
return scale_data_;
|
||||
}
|
||||
|
@ -331,9 +336,6 @@ class Rescale : public Component {
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace nnet1
|
||||
} // namespace kaldi
|
||||
|
||||
|
|
|
@ -24,20 +24,9 @@
|
|||
namespace kaldi {
|
||||
namespace nnet2 {
|
||||
|
||||
static BaseFloat GetFirstLearningRate(const Nnet &nnet) {
|
||||
for (int32 c = 0; c < nnet.NumComponents(); c++) {
|
||||
const UpdatableComponent *uc =
|
||||
dynamic_cast<const UpdatableComponent*>(&(nnet.GetComponent(c)));
|
||||
if (uc != NULL)
|
||||
return uc->LearningRate();
|
||||
}
|
||||
KALDI_ERR << "Neural net has no updatable components";
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
|
||||
/** This function makes sure the neural net ends with a
|
||||
MixtureProbComponent. If it doesn't, it adds one
|
||||
SumGroupComponent. If it doesn't, it adds one
|
||||
(with a single mixture/matrix corresponding to each
|
||||
output element.) [Before doing so, it makes sure
|
||||
that the last layer is a SoftmaxLayer, which is what
|
||||
|
@ -48,24 +37,20 @@ static BaseFloat GetFirstLearningRate(const Nnet &nnet) {
|
|||
static void GiveNnetCorrectTopology(Nnet *nnet,
|
||||
AffineComponent **affine_component,
|
||||
SoftmaxComponent **softmax_component,
|
||||
MixtureProbComponent **mixture_prob_component) {
|
||||
SumGroupComponent **sum_group_component) {
|
||||
int32 nc = nnet->NumComponents();
|
||||
KALDI_ASSERT(nc > 0);
|
||||
Component* component = &(nnet->GetComponent(nc - 1));
|
||||
if ((*mixture_prob_component =
|
||||
dynamic_cast<MixtureProbComponent*>(component)) == NULL) {
|
||||
KALDI_LOG << "Adding MixtureProbComponent to neural net.";
|
||||
if ((*sum_group_component =
|
||||
dynamic_cast<SumGroupComponent*>(component)) == NULL) {
|
||||
KALDI_LOG << "Adding SumGroupComponent to neural net.";
|
||||
int32 dim = component->OutputDim();
|
||||
// Give it the same learning rate as the first updatable layer we have.
|
||||
BaseFloat learning_rate = GetFirstLearningRate(*nnet),
|
||||
diag_element = 0.999; // actually it's a don't care.
|
||||
std::vector<int32> sizes(dim, 1); // a vector of all ones, of dimension "dim".
|
||||
|
||||
*mixture_prob_component = new MixtureProbComponent();
|
||||
(*mixture_prob_component)->Init(learning_rate,
|
||||
diag_element,
|
||||
sizes);
|
||||
nnet->Append(*mixture_prob_component);
|
||||
*sum_group_component = new SumGroupComponent();
|
||||
(*sum_group_component)->Init(sizes);
|
||||
nnet->Append(*sum_group_component);
|
||||
nc++;
|
||||
}
|
||||
component = &(nnet->GetComponent(nc - 2));
|
||||
|
@ -84,14 +69,16 @@ static void GiveNnetCorrectTopology(Nnet *nnet,
|
|||
/**
|
||||
This function works as follows.
|
||||
We first make sure the neural net has the correct topology, so its
|
||||
last component is a MixtureProbComponent.
|
||||
last component is a SumGroupComponent.
|
||||
|
||||
We then get the counts for each matrix in the MixtureProbComponent (these
|
||||
We then get the counts for each matrix in the SumGroupComponent (these
|
||||
will either correspond to leaves in the decision tree, or level-1 leaves, if
|
||||
we have a 2-level-tree system). We work out the total count for each of these
|
||||
matrices, by getting the count from the SoftmaxComponent.
|
||||
|
||||
Then, for each matrix in the Mixturemixture-prob component, we
|
||||
|
||||
We then increase, if necessary, the dimensions that the SumGroupComponent sums
|
||||
over increase the dimension of the SoftmaxComponent if necessary, and duplicate
|
||||
and then perturb the relevant rows of the AffineComponent.
|
||||
*/
|
||||
|
||||
|
||||
|
@ -100,18 +87,18 @@ void MixupNnet(const NnetMixupConfig &mixup_config,
|
|||
Nnet *nnet) {
|
||||
AffineComponent *affine_component = NULL;
|
||||
SoftmaxComponent *softmax_component = NULL;
|
||||
MixtureProbComponent *mixture_prob_component = NULL;
|
||||
SumGroupComponent *sum_group_component = NULL;
|
||||
GiveNnetCorrectTopology(nnet,
|
||||
&affine_component,
|
||||
&softmax_component,
|
||||
&mixture_prob_component); // Adds a MixtureProbComponent if needed.
|
||||
&sum_group_component); // Adds a SumGroupComponent if needed.
|
||||
|
||||
softmax_component->MixUp(mixup_config.num_mixtures,
|
||||
mixup_config.power,
|
||||
mixup_config.min_count,
|
||||
mixup_config.perturb_stddev,
|
||||
affine_component,
|
||||
mixture_prob_component);
|
||||
sum_group_component);
|
||||
nnet->Check(); // Checks that dimensions all match up.
|
||||
}
|
||||
|
||||
|
@ -120,15 +107,16 @@ void MixupNnet(const NnetMixupConfig &mixup_config,
|
|||
void SoftmaxComponent::MixUp(int32 num_mixtures,
|
||||
BaseFloat power,
|
||||
BaseFloat min_count,
|
||||
BaseFloat perturb_stddev,
|
||||
BaseFloat perturb_stddev,
|
||||
AffineComponent *ac,
|
||||
MixtureProbComponent *mc) {
|
||||
|
||||
SumGroupComponent *sc) {
|
||||
// "counts" is derived from this->counts_ by summing.
|
||||
Vector<BaseFloat> counts(mc->params_.size());
|
||||
std::vector<int32> old_sizes;
|
||||
sc->GetSizes(&old_sizes);
|
||||
Vector<BaseFloat> counts(old_sizes.size());
|
||||
int32 old_dim = 0;
|
||||
for (size_t i = 0; i < mc->params_.size(); i++) {
|
||||
int32 this_input_dim = mc->params_[i].NumCols();
|
||||
for (size_t i = 0; i < old_sizes.size(); i++) {
|
||||
int32 this_input_dim = old_sizes[i];
|
||||
BaseFloat this_tot_count = 0.0; /// Total the count out of
|
||||
/// all the output dims of the softmax layer that correspond
|
||||
/// to this mixture. We'll use this total to allocate new quasi-Gaussians.
|
||||
|
@ -141,16 +129,18 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
|
|||
|
||||
std::vector<int32> targets; // #mixtures for each state.
|
||||
|
||||
|
||||
// Get the target number of mixtures for each state.
|
||||
GetSplitTargets(counts, num_mixtures, power, min_count, &targets);
|
||||
KALDI_ASSERT(targets.size() == mc->params_.size());
|
||||
// floor each target to the current #mixture components.
|
||||
KALDI_ASSERT(targets.size() == old_sizes.size());
|
||||
std::vector<int32> new_sizes(old_sizes.size());
|
||||
for (size_t i = 0; i < targets.size(); i++)
|
||||
targets[i] = std::max(targets[i], mc->params_[i].NumCols());
|
||||
int32 new_dim = std::accumulate(targets.begin(), targets.end(),
|
||||
new_sizes[i] = std::max(targets[i], old_sizes[i]);
|
||||
int32 new_dim = std::accumulate(new_sizes.begin(), new_sizes.end(),
|
||||
static_cast<int32>(0)),
|
||||
affine_input_dim = ac->InputDim();
|
||||
KALDI_ASSERT(new_dim >= old_dim);
|
||||
sc->Init(new_sizes);
|
||||
|
||||
// bias and linear terms from affine component:
|
||||
Vector<BaseFloat> old_bias_term(ac->bias_params_);
|
||||
|
@ -165,11 +155,10 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
|
|||
// respectively. They get incremented in the following loop.
|
||||
int32 old_offset = 0, new_offset = 0;
|
||||
Vector<BaseFloat> old_counts(this->value_sum_);
|
||||
for (size_t i = 0; i < mc->params_.size(); i++) {
|
||||
const CuMatrix<BaseFloat> &this_old_params(mc->params_[i]);
|
||||
int32 this_old_dim = this_old_params.NumCols(),
|
||||
this_new_dim = targets[i],
|
||||
this_cur_dim = this_old_dim; // this_cur_dim is loop variable.
|
||||
for (size_t i = 0; i < old_sizes.size(); i++) {
|
||||
int32 this_old_dim = old_sizes[i],
|
||||
this_new_dim = new_sizes[i],
|
||||
this_cur_dim = this_old_dim; // this_cur_dim is loop variable.
|
||||
|
||||
SubMatrix<BaseFloat> this_old_linear_term(old_linear_term,
|
||||
old_offset, this_old_dim,
|
||||
|
@ -184,8 +173,6 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
|
|||
old_offset, this_old_dim),
|
||||
this_new_counts(new_counts,
|
||||
new_offset, this_new_dim);
|
||||
Matrix<BaseFloat> this_new_params(this_old_params.NumRows(),
|
||||
this_new_dim);
|
||||
|
||||
// Copy the same-dimensional part of the parameters and counts.
|
||||
this_new_linear_term.Range(0, this_old_dim, 0, affine_input_dim).
|
||||
|
@ -195,8 +182,6 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
|
|||
this_new_counts.Range(0, this_old_dim).
|
||||
CopyFromVec(this_old_counts);
|
||||
// this_new_params is the mixture weights.
|
||||
this_new_params.Range(0, this_old_params.NumRows(), 0, this_old_dim).
|
||||
CopyFromMat(this_old_params);
|
||||
// Add the new components...
|
||||
for (; this_cur_dim < this_new_dim; this_cur_dim++) {
|
||||
BaseFloat *count_begin = this_new_counts.Data(),
|
||||
|
@ -216,13 +201,9 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
|
|||
new_vec.AddVec(-perturb_stddev, rand);
|
||||
this_new_bias_term(max_index) += log(0.5);
|
||||
this_new_bias_term(new_index) = this_new_bias_term(max_index);
|
||||
// now copy the column of the MixtureProbComponent parameters.
|
||||
for (int32 j = 0; j < this_new_params.NumRows(); j++)
|
||||
this_new_params(j, new_index) = this_new_params(j, max_index);
|
||||
}
|
||||
old_offset += this_old_dim;
|
||||
new_offset += this_new_dim;
|
||||
mc->params_[i] = this_new_params;
|
||||
}
|
||||
KALDI_ASSERT(old_offset == old_dim && new_offset == new_dim);
|
||||
ac->SetParams(new_bias_term, new_linear_term);
|
||||
|
@ -230,8 +211,6 @@ void SoftmaxComponent::MixUp(int32 num_mixtures,
|
|||
this->value_sum_.CopyFromVec(new_counts);
|
||||
this->count_ = this->value_sum_.Sum();
|
||||
this->dim_ = new_dim;
|
||||
mc->input_dim_ = new_dim; // keep this up to date.
|
||||
// We already updated mc->params_.
|
||||
KALDI_LOG << "Mixed up from dimension of " << old_dim << " to " << new_dim
|
||||
<< " in the softmax layer.";
|
||||
}
|
||||
|
|
|
@ -27,15 +27,12 @@
|
|||
namespace kaldi {
|
||||
namespace nnet2 {
|
||||
|
||||
/** Configuration class that controls neural net "mixupage" which is actually a
|
||||
scaling on the parameters of each of the updatable layers.
|
||||
*/
|
||||
struct NnetMixupConfig {
|
||||
BaseFloat power;
|
||||
BaseFloat min_count;
|
||||
int32 num_mixtures;
|
||||
BaseFloat perturb_stddev;
|
||||
|
||||
|
||||
|
||||
NnetMixupConfig(): power(0.25), min_count(1000.0),
|
||||
num_mixtures(-1), perturb_stddev(0.01) { }
|
||||
|
@ -58,9 +55,8 @@ struct NnetMixupConfig {
|
|||
This function does something similar to Gaussian mixture splitting for
|
||||
GMMs, except applied to the output layer of the neural network.
|
||||
We create additional outputs, which will be summed over using a
|
||||
MixtureProbComponent (if one does not already exist, it will be
|
||||
added.)
|
||||
*/
|
||||
SumGroupComponent.
|
||||
*/
|
||||
|
||||
void MixupNnet(const NnetMixupConfig &mixup_config,
|
||||
Nnet *nnet);
|
||||
|
|
|
@ -532,6 +532,27 @@ void UnitTestMixtureProbComponent() {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void UnitTestSumGroupComponent() {
|
||||
std::vector<int32> sizes;
|
||||
int32 num_sizes = 1 + rand() % 5;
|
||||
for (int32 i = 0; i < num_sizes; i++)
|
||||
sizes.push_back(1 + rand() % 5);
|
||||
|
||||
{
|
||||
SumGroupComponent component;
|
||||
component.Init(sizes);
|
||||
UnitTestGenericComponentInternal(component);
|
||||
}
|
||||
{
|
||||
const char *str = "sizes=3:4:5";
|
||||
SumGroupComponent component;
|
||||
component.InitFromString(str);
|
||||
UnitTestGenericComponentInternal(component);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void UnitTestDctComponent() {
|
||||
int32 m = 1 + rand() % 4, n = 1 + rand() % 4,
|
||||
dct_dim = m, dim = m * n;
|
||||
|
@ -760,6 +781,7 @@ int main() {
|
|||
UnitTestBlockAffineComponent();
|
||||
UnitTestBlockAffineComponentPreconditioned();
|
||||
UnitTestMixtureProbComponent();
|
||||
UnitTestSumGroupComponent();
|
||||
UnitTestDctComponent();
|
||||
UnitTestFixedLinearComponent();
|
||||
UnitTestFixedAffineComponent();
|
||||
|
|
|
@ -75,6 +75,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
|
|||
ans = new AffinePreconInputComponent();
|
||||
} else if (component_type == "MixtureProbComponent") {
|
||||
ans = new MixtureProbComponent();
|
||||
} else if (component_type == "SumGroupComponent") {
|
||||
ans = new SumGroupComponent();
|
||||
} else if (component_type == "BlockAffineComponent") {
|
||||
ans = new BlockAffineComponent();
|
||||
} else if (component_type == "BlockAffineComponentPreconditioned") {
|
||||
|
@ -2922,6 +2924,92 @@ void MixtureProbComponent::UnVectorize(const VectorBase<BaseFloat> ¶ms) {
|
|||
KALDI_ASSERT(offset == params.Dim());
|
||||
}
|
||||
|
||||
void SumGroupComponent::Init(const std::vector<int32> &sizes) {
|
||||
KALDI_ASSERT(!sizes.empty());
|
||||
std::vector<Int32Pair> cpu_vec(sizes.size());
|
||||
std::vector<int32> reverse_cpu_vec;
|
||||
int32 cur_index = 0;
|
||||
for (size_t i = 0; i < sizes.size(); i++) {
|
||||
KALDI_ASSERT(sizes[i] > 0);
|
||||
cpu_vec[i].first = cur_index;
|
||||
cpu_vec[i].second = cur_index + sizes[i];
|
||||
cur_index += sizes[i];
|
||||
for (int32 j = cpu_vec[i].first; j < cpu_vec[i].second; j++)
|
||||
reverse_cpu_vec.push_back(i);
|
||||
}
|
||||
this->indexes_ = cpu_vec;
|
||||
this->reverse_indexes_ = reverse_cpu_vec;
|
||||
this->input_dim_ = cur_index;
|
||||
this->output_dim_ = sizes.size();
|
||||
}
|
||||
|
||||
void SumGroupComponent::InitFromString(std::string args) {
|
||||
std::string orig_args(args);
|
||||
std::vector<int32> sizes;
|
||||
bool ok = ParseFromString("sizes", &args, &sizes);
|
||||
|
||||
if (!ok || !args.empty() || sizes.empty())
|
||||
KALDI_ERR << "Invalid initializer for layer of type "
|
||||
<< Type() << ": \"" << orig_args << "\"";
|
||||
this->Init(sizes);
|
||||
}
|
||||
|
||||
Component* SumGroupComponent::Copy() const {
|
||||
SumGroupComponent *ans = new SumGroupComponent();
|
||||
ans->indexes_ = indexes_;
|
||||
ans->reverse_indexes_ = reverse_indexes_;
|
||||
ans->input_dim_ = input_dim_;
|
||||
ans->output_dim_ = output_dim_;
|
||||
return ans;
|
||||
}
|
||||
|
||||
void SumGroupComponent::Read(std::istream &is, bool binary) {
|
||||
ExpectOneOrTwoTokens(is, binary, "<SumGroupComponent>", "<Sizes>");
|
||||
std::vector<int32> sizes;
|
||||
ReadIntegerVector(is, binary, &sizes);
|
||||
ExpectToken(is, binary, "<SumGroupComponent>");
|
||||
this->Init(sizes);
|
||||
}
|
||||
|
||||
void SumGroupComponent::GetSizes(std::vector<int32> *sizes) const {
|
||||
std::vector<Int32Pair> indexes;
|
||||
indexes_.CopyToVec(&indexes);
|
||||
sizes->resize(indexes.size());
|
||||
for (size_t i = 0; i < indexes.size(); i++) {
|
||||
(*sizes)[i] = indexes[i].second - indexes[i].first;
|
||||
if (i == 0) { KALDI_ASSERT(indexes[i].first == 0); }
|
||||
else { KALDI_ASSERT(indexes[i].first == indexes[i-1].second); }
|
||||
KALDI_ASSERT(indexes[i].second > indexes[i].first);
|
||||
(*sizes)[i] = indexes[i].second - indexes[i].first;
|
||||
}
|
||||
}
|
||||
|
||||
void SumGroupComponent::Write(std::ostream &os, bool binary) const {
|
||||
WriteToken(os, binary, "<SumGroupComponent>");
|
||||
WriteToken(os, binary, "<Sizes>");
|
||||
std::vector<int32> sizes;
|
||||
this->GetSizes(&sizes);
|
||||
WriteIntegerVector(os, binary, sizes);
|
||||
WriteToken(os, binary, "<SumGroupComponent>");
|
||||
}
|
||||
|
||||
void SumGroupComponent::Propagate(const CuMatrixBase<BaseFloat> &in,
|
||||
int32 num_chunks,
|
||||
CuMatrix<BaseFloat> *out) const {
|
||||
out->Resize(in.NumRows(), this->OutputDim(), kUndefined);
|
||||
out->SumColumnRanges(in, indexes_);
|
||||
}
|
||||
|
||||
void SumGroupComponent::Backprop(const CuMatrixBase<BaseFloat> &, // in_value,
|
||||
const CuMatrixBase<BaseFloat> &, // out_value,
|
||||
const CuMatrixBase<BaseFloat> &out_deriv,
|
||||
int32 num_chunks,
|
||||
Component *to_update,
|
||||
CuMatrix<BaseFloat> *in_deriv) const {
|
||||
in_deriv->Resize(out_deriv.NumRows(), InputDim());
|
||||
in_deriv->CopyCols(out_deriv, reverse_indexes_);
|
||||
}
|
||||
|
||||
|
||||
std::string SpliceComponent::Info() const {
|
||||
std::stringstream stream;
|
||||
|
|
|
@ -450,7 +450,7 @@ class ScaleComponent: public Component {
|
|||
|
||||
|
||||
|
||||
class MixtureProbComponent; // Forward declaration.
|
||||
class SumGroupComponent; // Forward declaration.
|
||||
class AffineComponent; // Forward declaration.
|
||||
|
||||
class SoftmaxComponent: public NonlinearComponent {
|
||||
|
@ -472,12 +472,13 @@ class SoftmaxComponent: public NonlinearComponent {
|
|||
Component *to_update, // may be identical to "this".
|
||||
CuMatrix<BaseFloat> *in_deriv) const;
|
||||
|
||||
void MixUp(int32 num_mixtures, // implemented in mixup-nnet.cc
|
||||
void MixUp(int32 num_mixtures,
|
||||
BaseFloat power,
|
||||
BaseFloat min_count,
|
||||
BaseFloat perturb_stddev,
|
||||
AffineComponent *ac,
|
||||
MixtureProbComponent *mc);
|
||||
SumGroupComponent *sc);
|
||||
|
||||
virtual Component* Copy() const { return new SoftmaxComponent(*this); }
|
||||
private:
|
||||
SoftmaxComponent &operator = (const SoftmaxComponent &other); // Disallow.
|
||||
|
@ -1227,8 +1228,6 @@ class BlockAffineComponentPreconditioned: public BlockAffineComponent {
|
|||
// one for each row).
|
||||
|
||||
class MixtureProbComponent: public UpdatableComponent {
|
||||
friend class SoftmaxComponent; // Mixing-up done by a function
|
||||
// in that class.
|
||||
public:
|
||||
virtual int32 InputDim() const { return input_dim_; }
|
||||
virtual int32 OutputDim() const { return output_dim_; }
|
||||
|
@ -1275,6 +1274,53 @@ class MixtureProbComponent: public UpdatableComponent {
|
|||
int32 output_dim_;
|
||||
};
|
||||
|
||||
|
||||
// SumGroupComponent is used to sum up groups of posteriors.
|
||||
// It's used to introduce a kind of Gaussian-mixture-model-like
|
||||
// idea into neural nets. This is basically a degenerate case of
|
||||
// MixtureProbComponent; we had to implement it separately to
|
||||
// be efficient for CUDA (we can use this one regardless whether
|
||||
// we have CUDA or not; it's the normal case we want anyway).
|
||||
class SumGroupComponent: public Component {
|
||||
public:
|
||||
virtual int32 InputDim() const { return input_dim_; }
|
||||
virtual int32 OutputDim() const { return output_dim_; }
|
||||
void Init(const std::vector<int32> &sizes); // the vector is of the input dim
|
||||
// (>= 1) for each output dim.
|
||||
void GetSizes(std::vector<int32> *sizes) const; // Get a vector saying, for
|
||||
// each output-dim, how many
|
||||
// inputs were summed over.
|
||||
virtual void InitFromString(std::string args);
|
||||
SumGroupComponent() { }
|
||||
virtual std::string Type() const { return "SumGroupComponent"; }
|
||||
virtual bool BackpropNeedsInput() const { return false; }
|
||||
virtual bool BackpropNeedsOutput() const { return false; }
|
||||
virtual void Propagate(const CuMatrixBase<BaseFloat> &in,
|
||||
int32 num_chunks,
|
||||
CuMatrix<BaseFloat> *out) const;
|
||||
// Note: in_value and out_value are both dummy variables.
|
||||
virtual void Backprop(const CuMatrixBase<BaseFloat> &in_value,
|
||||
const CuMatrixBase<BaseFloat> &out_value,
|
||||
const CuMatrixBase<BaseFloat> &out_deriv,
|
||||
int32 num_chunks,
|
||||
Component *to_update, // may be identical to "this".
|
||||
CuMatrix<BaseFloat> *in_deriv) const;
|
||||
virtual Component* Copy() const;
|
||||
virtual void Read(std::istream &is, bool binary);
|
||||
virtual void Write(std::ostream &os, bool binary) const;
|
||||
|
||||
private:
|
||||
KALDI_DISALLOW_COPY_AND_ASSIGN(SumGroupComponent);
|
||||
// Note: Int32Pair is just struct{ int32 first; int32 second }; it's defined
|
||||
// in cu-matrixdim.h as extern "C" which is needed for the CUDA interface.
|
||||
CuArray<Int32Pair> indexes_; // for each output index, the (start, end) input
|
||||
// index.
|
||||
CuArray<int32> reverse_indexes_; // for each input index, the output index.
|
||||
int32 input_dim_;
|
||||
int32 output_dim_;
|
||||
};
|
||||
|
||||
|
||||
/// PermuteComponent does a random permutation of the dimensions. Useful in
|
||||
/// conjunction with block-diagonal transforms.
|
||||
class PermuteComponent: public Component {
|
||||
|
|
|
@ -12,7 +12,7 @@ BINFILES = nnet-train-xent-hardlab-perutt \
|
|||
nnet-train-mmi-sequential \
|
||||
nnet-train-mpe-sequential \
|
||||
rbm-train-cd1-frmshuff rbm-convert-to-nnet \
|
||||
nnet-forward nnet-copy nnet1-info nnet-concat \
|
||||
nnet-forward nnet-copy nnet-info nnet-concat \
|
||||
transf-to-nnet cmvn-to-nnet
|
||||
|
||||
OBJFILES =
|
||||
|
|
|
@ -93,7 +93,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
//create the shift component
|
||||
{
|
||||
AddShift* shift_component = new AddShift(shift.Dim(), shift.Dim(), &nnet);
|
||||
AddShift* shift_component = new AddShift(shift.Dim(), shift.Dim());
|
||||
//the pointer will be given to the nnet, so we don't need to call delete
|
||||
|
||||
//convert Vector to CuVector
|
||||
|
@ -103,12 +103,12 @@ int main(int argc, char *argv[]) {
|
|||
shift_component->SetShiftVec(cu_shift);
|
||||
|
||||
//append layer to the nnet
|
||||
nnet.AppendLayer(shift_component);
|
||||
nnet.AppendComponent(shift_component);
|
||||
}
|
||||
|
||||
//create the scale component
|
||||
{
|
||||
Rescale* scale_component = new Rescale(scale.Dim(), scale.Dim(), &nnet);
|
||||
Rescale* scale_component = new Rescale(scale.Dim(), scale.Dim());
|
||||
//the pointer will be given to the nnet, so we don't need to call delete
|
||||
|
||||
//convert Vector to CuVector
|
||||
|
@ -118,9 +118,8 @@ int main(int argc, char *argv[]) {
|
|||
scale_component->SetScaleVec(cu_scale);
|
||||
|
||||
//append layer to the nnet
|
||||
nnet.AppendLayer(scale_component);
|
||||
nnet.AppendComponent(scale_component);
|
||||
}
|
||||
|
||||
|
||||
//write the nnet
|
||||
{
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnetbin/nnet-concat.cc
|
||||
|
||||
// Copyright 2012 Karel Vesely
|
||||
// Copyright 2012-2013 Brno University of Technology (Author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -70,7 +70,7 @@ int main(int argc, char *argv[]) {
|
|||
nnet_next.Read(ki.Stream(), binary_read);
|
||||
}
|
||||
//append nnet_next to the network nnet
|
||||
nnet.Concatenate(&nnet_next);
|
||||
nnet.AppendNnet(nnet_next);
|
||||
}
|
||||
|
||||
//finally write the nnet to disk
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnetbin/nnet-copy.cc
|
||||
|
||||
// Copyright 2012 Karel Vesely
|
||||
// Copyright 2012 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -64,14 +64,14 @@ int main(int argc, char *argv[]) {
|
|||
// optionally remove N first layers
|
||||
if(remove_first_layers > 0) {
|
||||
for(int32 i=0; i<remove_first_layers; i++) {
|
||||
nnet.RemoveLayer(0);
|
||||
nnet.RemoveComponent(0);
|
||||
}
|
||||
}
|
||||
|
||||
// optionally remove N last layers
|
||||
if(remove_last_layers > 0) {
|
||||
for(int32 i=0; i<remove_last_layers; i++) {
|
||||
nnet.RemoveLastLayer();
|
||||
nnet.RemoveLastComponent();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -87,15 +87,15 @@ int main(int argc, char *argv[]) {
|
|||
Nnet nnet;
|
||||
nnet.Read(model_filename);
|
||||
//optionally remove softmax
|
||||
if(no_softmax && nnet.Layer(nnet.LayerCount()-1)->GetType() == Component::kSoftmax) {
|
||||
if(no_softmax && nnet.GetComponent(nnet.NumComponents()-1).GetType() == Component::kSoftmax) {
|
||||
KALDI_LOG << "Removing softmax from the nnet " << model_filename;
|
||||
nnet.RemoveLayer(nnet.LayerCount()-1);
|
||||
nnet.RemoveComponent(nnet.NumComponents()-1);
|
||||
}
|
||||
//check for some non-sense option combinations
|
||||
if(apply_log && no_softmax) {
|
||||
KALDI_ERR << "Nonsense option combination : --apply-log=true and --no-softmax=true";
|
||||
}
|
||||
if(apply_log && nnet.Layer(nnet.LayerCount()-1)->GetType() != Component::kSoftmax) {
|
||||
if(apply_log && nnet.GetComponent(nnet.NumComponents()-1).GetType() != Component::kSoftmax) {
|
||||
KALDI_ERR << "Used --apply-log=true, but nnet " << model_filename
|
||||
<< " does not have <softmax> as last component!";
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnetbin/nnet-train-mmi-sequential.cc
|
||||
|
||||
// Copyright 2012-2013 Karel Vesely
|
||||
// Copyright 2012-2013 Brno University of Technology (author: Karel Vesely)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -173,9 +173,9 @@ int main(int argc, char *argv[]) {
|
|||
Nnet nnet;
|
||||
nnet.Read(model_filename);
|
||||
// using activations directly: remove softmax, if present
|
||||
if (nnet.Layer(nnet.LayerCount()-1)->GetType() == Component::kSoftmax) {
|
||||
if (nnet.GetComponent(nnet.NumComponents()-1).GetType() == Component::kSoftmax) {
|
||||
KALDI_LOG << "Removing softmax from the nnet " << model_filename;
|
||||
nnet.RemoveLayer(nnet.LayerCount()-1);
|
||||
nnet.RemoveComponent(nnet.NumComponents()-1);
|
||||
} else {
|
||||
KALDI_LOG << "The nnet was without softmax " << model_filename;
|
||||
}
|
||||
|
@ -424,7 +424,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
//add back the softmax
|
||||
KALDI_LOG << "Appending the softmax " << target_model_filename;
|
||||
nnet.AppendLayer(new Softmax(nnet.OutputDim(),nnet.OutputDim(),&nnet));
|
||||
nnet.AppendComponent(new Softmax(nnet.OutputDim(),nnet.OutputDim()));
|
||||
//store the nnet
|
||||
nnet.Write(target_model_filename, binary);
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// nnetbin/nnet-train-mpe-sequential.cc
|
||||
|
||||
// Copyright 2011-2013 Karel Vesely; Arnab Ghoshal
|
||||
// Copyright 2011-2013 Brno University of Technology (author: Karel Vesely); Arnab Ghoshal
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
|
@ -175,9 +175,9 @@ int main(int argc, char *argv[]) {
|
|||
Nnet nnet;
|
||||
nnet.Read(model_filename);
|
||||
// using activations directly: remove softmax, if present
|
||||
if (nnet.Layer(nnet.LayerCount()-1)->GetType() == Component::kSoftmax) {
|
||||
if (nnet.GetComponent(nnet.NumComponents()-1).GetType() == Component::kSoftmax) {
|
||||
KALDI_LOG << "Removing softmax from the nnet " << model_filename;
|
||||
nnet.RemoveLayer(nnet.LayerCount()-1);
|
||||
nnet.RemoveComponent(nnet.NumComponents()-1);
|
||||
} else {
|
||||
KALDI_LOG << "The nnet was without softmax " << model_filename;
|
||||
}
|
||||
|
@ -357,7 +357,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// add the softmax layer back before writing
|
||||
KALDI_LOG << "Appending the softmax " << target_model_filename;
|
||||
nnet.AppendLayer(new Softmax(nnet.OutputDim(),nnet.OutputDim(),&nnet));
|
||||
nnet.AppendComponent(new Softmax(nnet.OutputDim(),nnet.OutputDim()));
|
||||
//store the nnet
|
||||
nnet.Write(target_model_filename, binary);
|
||||
|
||||
|
|
|
@ -57,9 +57,9 @@ int main(int argc, char *argv[]) {
|
|||
nnet.Read(ki.Stream(), binary_read);
|
||||
}
|
||||
|
||||
KALDI_ASSERT(nnet.LayerCount() == 1);
|
||||
KALDI_ASSERT(nnet.Layer(0)->GetType() == Component::kRbm);
|
||||
RbmBase& rbm = dynamic_cast<RbmBase&>(*nnet.Layer(0));
|
||||
KALDI_ASSERT(nnet.NumComponents() == 1);
|
||||
KALDI_ASSERT(nnet.GetComponent(0).GetType() == Component::kRbm);
|
||||
RbmBase& rbm = dynamic_cast<RbmBase&>(nnet.GetComponent(0));
|
||||
|
||||
{
|
||||
Output ko(model_out_filename, binary_write);
|
||||
|
|
|
@ -103,9 +103,9 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
Nnet nnet;
|
||||
nnet.Read(model_filename);
|
||||
KALDI_ASSERT(nnet.LayerCount()==1);
|
||||
KALDI_ASSERT(nnet.Layer(0)->GetType() == Component::kRbm);
|
||||
RbmBase &rbm = dynamic_cast<RbmBase&>(*nnet.Layer(0));
|
||||
KALDI_ASSERT(nnet.NumComponents()==1);
|
||||
KALDI_ASSERT(nnet.GetComponent(0).GetType() == Component::kRbm);
|
||||
RbmBase &rbm = dynamic_cast<RbmBase&>(nnet.GetComponent(0));
|
||||
|
||||
// Configure the RBM
|
||||
// first get make some options easy to access:
|
||||
|
|
|
@ -61,7 +61,7 @@ int main(int argc, char *argv[]) {
|
|||
//we will put the transform to the nnet
|
||||
Nnet nnet;
|
||||
//create affine transform layer
|
||||
AffineTransform* layer = new AffineTransform(transform.NumCols(),transform.NumRows(),&nnet);
|
||||
AffineTransform* layer = new AffineTransform(transform.NumCols(),transform.NumRows());
|
||||
//the pointer will be given to the nnet, so we don't need to call delete
|
||||
|
||||
//convert Matrix to CuMatrix
|
||||
|
@ -71,7 +71,7 @@ int main(int argc, char *argv[]) {
|
|||
layer->SetLinearity(cu_transform);
|
||||
|
||||
//append layer to the nnet
|
||||
nnet.AppendLayer(layer);
|
||||
nnet.AppendComponent(layer);
|
||||
|
||||
//write the nnet
|
||||
{
|
||||
|
|
Загрузка…
Ссылка в новой задаче