From 026426ad0847afd12325b7f535d47e71f2061b14 Mon Sep 17 00:00:00 2001 From: Yinggong ZHAO Date: Mon, 1 Jun 2015 23:29:25 -0700 Subject: [PATCH 01/21] In sequencereader move labelOutput to DeviceId and remove useless code in NCE-LSTM, which will move data from GPU to CPU --- DataReader/LMSequenceReader/SequenceReader.cpp | 4 ++++ MachineLearning/CNTK/SimpleNetworkBuilder.cpp | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/DataReader/LMSequenceReader/SequenceReader.cpp b/DataReader/LMSequenceReader/SequenceReader.cpp index 9c61b52a1..9b39b7ab0 100644 --- a/DataReader/LMSequenceReader/SequenceReader.cpp +++ b/DataReader/LMSequenceReader/SequenceReader.cpp @@ -2051,6 +2051,10 @@ void BatchSequenceReader::GetLabelOutput(std::mapTransferFromDeviceToDevice(CPUDEVICE, curDevId, true, false, false); + } } template class BatchSequenceReader; diff --git a/MachineLearning/CNTK/SimpleNetworkBuilder.cpp b/MachineLearning/CNTK/SimpleNetworkBuilder.cpp index 2c800cf84..461a8b622 100644 --- a/MachineLearning/CNTK/SimpleNetworkBuilder.cpp +++ b/MachineLearning/CNTK/SimpleNetworkBuilder.cpp @@ -1273,8 +1273,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { w = m_net->CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - double val = w->FunctionValues()(0, 0); - /// the label is a dense matrix. each element is the word index label = m_net->CreateInputNode(L"labels", 2 * (this->nce_noises + 1), mbSize); From b488e18a933bde53da3cf877e046c0f267eddb18 Mon Sep 17 00:00:00 2001 From: Yinggong ZHAO Date: Mon, 1 Jun 2015 23:36:56 -0700 Subject: [PATCH 02/21] remove sampleCount in CPUMatrix::AssignNoiseContrastiveEstimation --- Math/Math/CPUMatrix.cpp | 3 +-- Math/Math/CPUMatrix.h | 2 +- Math/Math/Matrix.cpp | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Math/Math/CPUMatrix.cpp b/Math/Math/CPUMatrix.cpp index 9d5c505f1..4770a11a3 100644 --- a/Math/Math/CPUMatrix.cpp +++ b/Math/Math/CPUMatrix.cpp @@ -3837,7 +3837,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void CPUMatrix::AssignNoiseContrastiveEstimation(const CPUMatrix& a, - const CPUMatrix& b, const CPUMatrix& bias, size_t sampleCount, CPUMatrix& tmp, CPUMatrix& c) + const CPUMatrix& b, const CPUMatrix& bias, CPUMatrix& tmp, CPUMatrix& c) //this: samples+probs // a: hidden // b: embedding @@ -3852,7 +3852,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { std::cerr << endl; } */ - sampleCount *= 1; double log_likelihood = 0.0; size_t sample_size = this->GetNumRows() / 2; size_t batch_size = this->GetNumCols(); diff --git a/Math/Math/CPUMatrix.h b/Math/Math/CPUMatrix.h index 8a39bda0b..59cce206f 100644 --- a/Math/Math/CPUMatrix.h +++ b/Math/Math/CPUMatrix.h @@ -216,7 +216,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { CPUMatrix& AssignVectorNorm2Of(CPUMatrix& a, const bool isColWise); void AssignNoiseContrastiveEstimation(const CPUMatrix& a, const CPUMatrix& b, const CPUMatrix& bias, - size_t sampleCount, CPUMatrix& tmp, CPUMatrix& c); + CPUMatrix& tmp, CPUMatrix& c); void AssignNCEUnnormalizedEval(const CPUMatrix& a, const CPUMatrix& b, const CPUMatrix& bias, CPUMatrix& c); diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index 23e914be6..ec53f4b07 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -3556,7 +3556,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t sampleCount = a.m_CPUMatrix->GetNumElements() / a.m_CPUMatrix->GetNumRows(); tmp.Resize(a.GetNumRows() / 2, sampleCount); - a.m_CPUMatrix->AssignNoiseContrastiveEstimation(*b.m_CPUMatrix, *c.m_CPUMatrix, *bias.m_CPUMatrix, sampleCount, *tmp.m_CPUMatrix, *this->m_CPUMatrix); + a.m_CPUMatrix->AssignNoiseContrastiveEstimation(*b.m_CPUMatrix, *c.m_CPUMatrix, *bias.m_CPUMatrix, *tmp.m_CPUMatrix, *this->m_CPUMatrix); } else { From a9c669cc2acfd2fb419e9a44c3636b0e1bbf0554 Mon Sep 17 00:00:00 2001 From: Marko Radmilac Date: Tue, 2 Jun 2015 01:12:33 -0700 Subject: [PATCH 03/21] Adding script for build and test --- Makefile.gpu | 12 ++-- Scripts/build-and-test | 157 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 6 deletions(-) create mode 100755 Scripts/build-and-test diff --git a/Makefile.gpu b/Makefile.gpu index d42fbc8c2..eefb13025 100644 --- a/Makefile.gpu +++ b/Makefile.gpu @@ -32,11 +32,11 @@ DEVICE = gpu BUILDTYPE = debug #BUILDTYPE = release # comment following and uncomment the next one to enable MKL library -#MATHLIB = acml -MATHLIB = mkl +MATHLIB = acml +#MATHLIB = mkl # modify relevant path below for your system MKL_PATH = /usr/users/chiaying/intel/composer_xe_2013.2.146 -ACML_PATH = /usr/local/acml5.3.0/gfortran64 +ACML_PATH = /usr/local/acml5.3.1/ifort64 ####### BUILDFOR = $(ARCH).$(DEVICE).$(BUILDTYPE).$(MATHLIB) @@ -48,8 +48,8 @@ ifeq ($(BUILDTYPE),debug) BUILDTYPE_OPT = -g GPU_BUILDTYPE_OPT = -G else - BUILDTYPE_OPT = -O4 - GPU_BUILDTYPE_OPT = + BUILDTYPE_OPT = -O3 -flto + GPU_BUILDTYPE_OPT = -O3 endif ifeq ($(MATHLIB),mkl) @@ -142,7 +142,7 @@ $(OBJDIR)/%.o : %.cu Makefile @echo $(SEPARATOR) @echo creating $@ for $(ARCH) with build type $(BUILDTYPE) @mkdir -p $(dir $@) - $(NVCC) -c $< -o $@ $(BUILDTYPE_OPT) $(GPU_BUILDTYPE_OPT) $(NVCCFLAGS) $(INCFLAGS) -Xcompiler -fPIC + $(NVCC) -c $< -o $@ $(GPU_BUILDTYPE_OPT) $(NVCCFLAGS) $(INCFLAGS) -Xcompiler -fPIC $(OBJDIR)/%.o : %.cpp Makefile @echo $(SEPARATOR) diff --git a/Scripts/build-and-test b/Scripts/build-and-test new file mode 100755 index 000000000..41d11d4fc --- /dev/null +++ b/Scripts/build-and-test @@ -0,0 +1,157 @@ +#!/bin/bash + +# Setting some default values +CNTK_CLEANUP=1 +QUIET_MAKE= + +# parsing command line arguments: +while [[ $# > 0 ]] +do +key="$1" + +case $key in + -h|--help) + echo "Usage: build-and-test [options]" + echo "Options:" + echo " -q|--quiet-make - redirect build output to files" + echo " -n|--no-cleanup - leave build binaries intact" + echo "If CNTK root is empty and branch is not specified then master CNTK branch is built" + exit 1 + ;; + -n|--no-cleanup) + CNTK_CLEANUP=0 + ;; + -q|--quiet-make) + QUIET_MAKE=1 + ;; + -*) + echo Unkown option $key + exit 1 + ;; + *) + echo Unkown option $key + exit 1 + ;; +esac +shift # past argument or value +done + +# Step 0 -- Validate all necessary prerequisites +# It is possible to use this script on Windows to build CNTK +# from Cygwin window with Visual C++ environment loaded. +# In that case OS environment variable will be set and we +# can use it to differentiate from Linux. +if [[ $OS == "Windows_NT" && $OSTYPE == "cygwin" ]]; then + DEBUG_DIR=Debug + RELEASE_DIR=Release + PREFIX_DIR=x64 + BIN_NAME=CNTK.exe + + if [[ $VCINSTALLDIR == "" ]]; then + echo "============ Visual Studio environment not properly setup ============" + echo "============ Please find and run the appropriate vcvarsall.bat script ============" + exit 1 + fi +elif [[ $OSTYPE == "linux-gnu" ]]; then + DEBUG_DIR=x86_64.gpu.debug.acml + RELEASE_DIR=x86_64.gpu.release.acml + PREFIX_DIR=bin + BIN_NAME=cntk +else + echo "============ ERROR: Unsupported OS ============" + echo "============ Scripts supports only building from Linux and Windows through Cygwin ============" + exit 1 +fi + +# Step 1 -- Prepare temporary folders and files, tweak settings if necessary +TMP_ROOT=`mktemp -d /tmp/cntk.XXXXX || exit $?` +echo "============ Creating CNTK temp directory in $TMP_ROOT ============" + +TMP_CONF_FILE=`mktemp $TMP_ROOT/Simple.conf.XXXXX || exit $?` +TMP_RESULT_FILE=`mktemp $TMP_ROOT/Result.XXXXX || exit $?` + +SCRIPT=`readlink -f $0` +SCRIPT_DIR=`dirname $SCRIPT` +CNTK_ROOT=`dirname $SCRIPT_DIR` + +if ! [[ -d "$CNTK_ROOT/.git" ]]; then + echo "============ ERROR: Build script located in the wrong directory ($SCRIPT_DIR) ============" + error 1 +fi + +cd $CNTK_ROOT +cp Demos/Simple/Simple.config $TMP_CONF_FILE || exit $? +MAKEFILE=Makefile.gpu + +# Our make is too noisy right now and it is difficult to spot +# issues from stdout and stderr. In the quiet mode these are +# redirected to a file where they could be examined after the fact +if [[ $QUIET_MAKE == 1 ]]; then + exec 6>>$TMP_ROOT/stdout || exit $? + exec 7>>$TMP_ROOT/stderr || exit $? +else + exec 6>&1 || exit $? + exec 7>&2 || exit $? +fi + +# Step 2 -- Perform necessary builds +for FLAVOR in debug release +do + echo "============ Building CNTK $FLAVOR ============" + if [[ $OS == "Windows_NT" ]]; then + msbuild.exe /property:Configuration=$FLAVOR /t:Clean || exit $? + msbuild.exe /property:Configuration=$FLAVOR || exit $? + else + make BUILDTYPE=$FLAVOR -f $MAKEFILE clean || exit $? + make BUILDTYPE=$FLAVOR -j -f $MAKEFILE 1>&6 2>&7 || exit $? + fi +done + +if ! [[ -f "$CNTK_ROOT/$PREFIX_DIR/$DEBUG_DIR/$BIN_NAME" && -f "$CNTK_ROOT/$PREFIX_DIR/$RELEASE_DIR/$BIN_NAME" ]]; then + echo "============ ERROR: CNTK did not build properly ============" + exit 1 +fi + +# Step 3 -- Run the tests to verify that everything works properly +cd $PREFIX_DIR + +for TARGET in CPU GPU +do + # These sed scripts are simply toggling DeviceNumber argument in the config file + # If it is set to Auto, it will pick GPU over CPU. At -1 CPU is selected. + if [[ $TARGET == CPU ]]; then + sed -i -e 's/^DeviceNumber.*/DeviceNumber=-1/g' $TMP_CONF_FILE || exit $? + else + sed -i -e 's/^DeviceNumber.*/DeviceNumber=Auto/g' $TMP_CONF_FILE || exit $? + fi + + for FLAVOR_DIR in $DEBUG_DIR $RELEASE_DIR + do + echo "============ Running CNTK ($FLAVOR_DIR) ($TARGET) ============" + rm -rf models + if [[ $OS == "Windows_NT" ]]; then + # We have to use cygpath on Windows to modify the file paths into the format readable by cntk. + time ./$FLAVOR_DIR/$BIN_NAME configFile="`cygpath -w $TMP_CONF_FILE`" 2>$TMP_RESULT_FILE || exit $? + else + time ./$FLAVOR_DIR/$BIN_NAME configFile=$TMP_CONF_FILE 2>$TMP_RESULT_FILE || exit $? + fi + grep -q "Using $TARGET" $TMP_RESULT_FILE || exit $? + grep -q "EXCEPTION" $TMP_RESULT_FILE && exit $? + done +done + +# Step 4 -- Optionally cleanup after builds and tests +if [[ $CNTK_CLEANUP == 1 ]]; then + rm -rf models + cd $CNTK_ROOT + for FLAVOR in debug release + do + echo "============ Cleaning up CNTK $FLAVOR ============" + if [[ $OS == "Windows_NT" ]]; then + msbuild.exe /property:Configuration=$FLAVOR /t:Clean || exit $? + else + make BUILDTYPE=$FLAVOR -f $MAKEFILE clean || exit $? + fi + done + rm -rf $TMP_ROOT +fi From ac723ceae784a2b7523f78b07bffceff4c40ad22 Mon Sep 17 00:00:00 2001 From: Marko Radmilac Date: Tue, 2 Jun 2015 12:50:32 -0700 Subject: [PATCH 04/21] Add completion message --- Scripts/build-and-test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Scripts/build-and-test b/Scripts/build-and-test index 41d11d4fc..93e68a330 100755 --- a/Scripts/build-and-test +++ b/Scripts/build-and-test @@ -155,3 +155,5 @@ if [[ $CNTK_CLEANUP == 1 ]]; then done rm -rf $TMP_ROOT fi + +echo "============ Build and test of CNTK was successful! ============" From 12b1ab8ca68ecdce94055e5f8b769a82dc2cc8e0 Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Wed, 3 Jun 2015 11:36:35 -0700 Subject: [PATCH 05/21] Some minor changes to the Simple Demo config file to allow specifying the path to input files using the RootDir variable --- Demos/Simple/Simple.config | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Demos/Simple/Simple.config b/Demos/Simple/Simple.config index 2176f8414..dd4078505 100644 --- a/Demos/Simple/Simple.config +++ b/Demos/Simple/Simple.config @@ -1,8 +1,9 @@ -# command=Simple_Demo_Output +RootDir=.. command=Simple_Demo:Simple_Demo_Output # deviceId=-1 for CPU, >=0 for GPU devices DeviceNumber=-1 + #stderr=Demo precision=float @@ -13,7 +14,6 @@ deviceId=$DeviceNumber$ outputNodeNames=ScaledLogLikelihood traceLevel=1 - ####################################### # TRAINING CONFIG (Simple, Fixed LR) # ####################################### @@ -52,22 +52,22 @@ Simple_Demo=[ reader=[ # reader to use readerType=UCIFastReader - file=../Demos/Simple/SimpleDataTrain.txt + file=$RootDir$/Demos/Simple/SimpleDataTrain.txt miniBatchMode=Partial randomize=Auto verbosity=1 features=[ - dim=2 # two-dimensional input data + dim=2 # two-dimensional input data start=0 # Start with first element on line ] labels=[ - start=2 # Skip two elements + start=2 # Skip two elements dim=1 # One label dimension labelDim=2 # Two labels possible - labelMappingFile=../Demos/Simple/SimpleMapping.txt + labelMappingFile=$RootDir$/Demos/Simple/SimpleMapping.txt ] ] ] @@ -84,16 +84,16 @@ Simple_Demo_Output=[ reader=[ # reader to use readerType=UCIFastReader - file=../Demos/Simple/SimpleDataTest.txt + file=$RootDir$/Demos/Simple/SimpleDataTest.txt features=[ dim=2 - start=0 + start=0 ] labels=[ - start=2 + start=2 dim=1 labelDim=2 - labelMappingFile=../Demos/Simple/SimpleMapping.txt + labelMappingFile=$RootDir$/Demos/Simple/SimpleMapping.txt ] ] outputPath=SimpleOutput # Dump output as text From c35d51dfe5ba97264aaacae3391df4e2453cf0a3 Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Wed, 3 Jun 2015 14:16:28 -0700 Subject: [PATCH 06/21] Fixed a bug that was incorrectly deleting the CPUMatrix external buffer --- Math/Math/CPUMatrix.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Math/Math/CPUMatrix.cpp b/Math/Math/CPUMatrix.cpp index 4770a11a3..2c34b52ac 100644 --- a/Math/Math/CPUMatrix.cpp +++ b/Math/Math/CPUMatrix.cpp @@ -632,16 +632,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { // if it's externally managed, then populate the structure if (matrixFlags&matrixFlagDontOwnBuffer) { + // free previous array allocation if any before overwriting if (m_pArray != nullptr) delete [] m_pArray; m_pArray = pArray; m_numRows = numRows; m_numCols = numCols; - // free previous array allocation if any before overwriting - if (m_pArray != nullptr) - delete[] m_pArray; - m_pArray = pArray; m_elemSizeAllocated = GetNumElements(); m_externalBuffer = true; } From 4b29673fda9596b8a372a576d575e80dd7a6021e Mon Sep 17 00:00:00 2001 From: Dong Yu Date: Wed, 3 Jun 2015 16:57:02 -0700 Subject: [PATCH 07/21] Fix the error throw bugs in UCIParser. Now if a file cannot be opened the error will be thrown and caught and cntk will exit gracefully. --- DataReader/UCIFastReader/UCIParser.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/DataReader/UCIFastReader/UCIParser.cpp b/DataReader/UCIFastReader/UCIParser.cpp index da8475c43..83244581a 100644 --- a/DataReader/UCIFastReader/UCIParser.cpp +++ b/DataReader/UCIFastReader/UCIParser.cpp @@ -362,10 +362,10 @@ void UCIParser::ParseInit(LPCWSTR fileName, size_t startFeat errno_t err = _wfopen_s( &m_pFile, fileName, L"rb" ); if (err) - std::runtime_error("UCIParser::ParseInit - error opening file"); + throw std::runtime_error("UCIParser::ParseInit - error opening file"); int rc = _fseeki64(m_pFile, 0, SEEK_END); if (rc) - std::runtime_error("UCIParser::ParseInit - error seeking in file"); + throw std::runtime_error("UCIParser::ParseInit - error seeking in file"); m_fileSize = GetFilePosition(); m_fileBuffer = new BYTE[m_bufferSize]; @@ -379,7 +379,7 @@ int64_t UCIParser::GetFilePosition() { int64_t position = _ftelli64(m_pFile); if (position == -1L) - std::runtime_error("UCIParser::GetFilePosition - error retrieving file position in file"); + throw std::runtime_error("UCIParser::GetFilePosition - error retrieving file position in file"); return position; } @@ -392,7 +392,7 @@ void UCIParser::SetFilePosition(int64_t position) { int rc = _fseeki64(m_pFile, position, SEEK_SET); if (rc) - std::runtime_error("UCIParser::SetFilePosition - error seeking in file"); + throw std::runtime_error("UCIParser::SetFilePosition - error seeking in file"); // setup state machine to start at this position PrepareStartPosition(position); @@ -445,7 +445,7 @@ size_t UCIParser::UpdateBuffer() size_t bytesToRead = min(m_bufferSize, m_fileSize-m_bufferStart)-saveBytes; size_t bytesRead = fread(m_fileBuffer+saveBytes, 1, bytesToRead, m_pFile); if (bytesRead == 0 && ferror(m_pFile)) - std::runtime_error("UCIParser::UpdateBuffer - error reading file"); + throw std::runtime_error("UCIParser::UpdateBuffer - error reading file"); return bytesRead; } From 02080fc0f6d6d5840c2c586443b9eec02f455387 Mon Sep 17 00:00:00 2001 From: Mike Seltzer Date: Thu, 4 Jun 2015 13:12:10 -0700 Subject: [PATCH 08/21] fix bug in multi utterance reader when mlf and feature file are mismatched in duration --- .../HTKMLFReader/utterancesourcemulti.h | 67 +++++++++++-------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/DataReader/HTKMLFReader/utterancesourcemulti.h b/DataReader/HTKMLFReader/utterancesourcemulti.h index 4af5c9ec1..6b4ba1812 100644 --- a/DataReader/HTKMLFReader/utterancesourcemulti.h +++ b/DataReader/HTKMLFReader/utterancesourcemulti.h @@ -382,47 +382,58 @@ public: // TODO: we can store labels more efficiently now since we don't do frame-wise random access anymore. // OK, utterance has all we need --remember it - utteranceset.push_back (std::move (utterance)); if (m==0) { - _totalframes += uttframes; - framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference if (!labels.empty() && !lacksmlf) //if (!labels.empty() && labelsiter != labels[0].end()) { - foreach_index (j, labels) + // first verify that all the label files have the proper duration + bool durationmatch = true; + foreach_index(j, labels) { const auto & labseq = labels[j].find(key)->second; // check if durations match; skip if not - size_t labframes = labseq.empty() ? 0 : (labseq[labseq.size()-1].firstframe + labseq[labseq.size()-1].numframes); + size_t labframes = labseq.empty() ? 0 : (labseq[labseq.size() - 1].firstframe + labseq[labseq.size() - 1].numframes); if (labframes != uttframes) { - fprintf (stderr, " [duration mismatch (%d in label vs. %d in feat file), skipping %S]", labframes, uttframes, key.c_str()); + fprintf(stderr, " [duration mismatch (%d in label vs. %d in feat file), skipping %S]", labframes, uttframes, key.c_str()); nomlf++; - continue; // skip this utterance at all + durationmatch = false; + break; // continue; // skip this utterance at all } - // expand classid sequence into flat array - foreach_index (i, labseq) + } + if (durationmatch){ + utteranceset.push_back(std::move(utterance)); + _totalframes += uttframes; + framesaccum.push_back(uttframes); //track number of frames in each utterance - first feature is the reference + // then parse each mlf if the durations are consistent + foreach_index(j, labels) { - const auto & e = labseq[i]; - if ((i > 0 && labseq[i-1].firstframe + labseq[i-1].numframes != e.firstframe) || (i == 0 && e.firstframe != 0)) - throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: labels not in consecutive order MLF in label set: %S", key.c_str())); - if (e.classid >= udim[j]) - throw std::runtime_error (msra::strfun::strprintf ("minibatchutterancesource: class id %d exceeds model output dimension %d in file %S", e.classid, udim, key.c_str())); - if (e.classid != (CLASSIDTYPE) e.classid) - throw std::runtime_error ("CLASSIDTYPE has too few bits"); - for (size_t t = e.firstframe; t < e.firstframe + e.numframes; t++) - classids[j]->push_back ((CLASSIDTYPE) e.classid); - numclasses[j] = max (numclasses[j], 1u + e.classid); - counts[j].resize (numclasses[j], 0); - counts[j][e.classid] += e.numframes; - } - classids[j]->push_back ((CLASSIDTYPE) -1); // append a boundary marker marker for checking + const auto & labseq = labels[j].find(key)->second; + // expand classid sequence into flat array + foreach_index(i, labseq) + { + const auto & e = labseq[i]; + if ((i > 0 && labseq[i - 1].firstframe + labseq[i - 1].numframes != e.firstframe) || (i == 0 && e.firstframe != 0)) + throw std::runtime_error(msra::strfun::strprintf("minibatchutterancesource: labels not in consecutive order MLF in label set: %S", key.c_str())); + if (e.classid >= udim[j]) + throw std::runtime_error(msra::strfun::strprintf("minibatchutterancesource: class id %d exceeds model output dimension %d in file %S", e.classid, udim, key.c_str())); + if (e.classid != (CLASSIDTYPE)e.classid) + throw std::runtime_error("CLASSIDTYPE has too few bits"); + for (size_t t = e.firstframe; t < e.firstframe + e.numframes; t++) + classids[j]->push_back((CLASSIDTYPE)e.classid); + numclasses[j] = max(numclasses[j], 1u + e.classid); + counts[j].resize(numclasses[j], 0); + counts[j][e.classid] += e.numframes; + } - if (!labels[j].empty() && classids[j]->size() != _totalframes + utteranceset.size()) - throw std::logic_error (msra::strfun::strprintf ("minibatchutterancesource: label duration inconsistent with feature file in MLF label set: %S", key.c_str())); - assert (labels[j].empty() || classids[j]->size() == _totalframes + utteranceset.size()); + classids[j]->push_back((CLASSIDTYPE)-1); // append a boundary marker marker for checking + + if (!labels[j].empty() && classids[j]->size() != _totalframes + utteranceset.size()) + throw std::logic_error(msra::strfun::strprintf("minibatchutterancesource: label duration inconsistent with feature file in MLF label set: %S", key.c_str())); + assert(labels[j].empty() || classids[j]->size() == _totalframes + utteranceset.size()); + } } } else{ @@ -451,7 +462,7 @@ public: } if (nomlf + nolat > 0) { - fprintf (stderr, "minibatchutterancesource: out of %d files, %d files not found in label set and %d have no lattice\n", infiles.size(), nomlf, nolat); + fprintf (stderr, "minibatchutterancesource: out of %d files, %d files not found in label set and %d have no lattice\n", infiles[0].size(), nomlf, nolat); if (nomlf + nolat > infiles[m].size() / 2) throw std::runtime_error ("minibatchutterancesource: too many files not found in label set--assuming broken configuration\n"); } @@ -1236,4 +1247,4 @@ public: }; };}; - + From e4424d56978a27033f1791e5402573a5149f2016 Mon Sep 17 00:00:00 2001 From: Yu Date: Sun, 7 Jun 2015 18:19:26 -0400 Subject: [PATCH 09/21] Fix the comile on linux for kaldi reader --- DataReader/Kaldi2Reader/HTKMLFReader.cpp | 1 + DataReader/Kaldi2Reader/HTKMLFWriter.cpp | 1 + Makefile_kaldi2.cpu | 16 ++++++++-------- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/DataReader/Kaldi2Reader/HTKMLFReader.cpp b/DataReader/Kaldi2Reader/HTKMLFReader.cpp index ee6bc91d1..7a1d78f78 100644 --- a/DataReader/Kaldi2Reader/HTKMLFReader.cpp +++ b/DataReader/Kaldi2Reader/HTKMLFReader.cpp @@ -24,6 +24,7 @@ #define DATAREADER_EXPORTS // creating the exports here #include "DataReader.h" #include "HTKMLFReader.h" +#include "commandArgUtil.h" #ifdef LEAKDETECT #include // for memory leak detection #endif diff --git a/DataReader/Kaldi2Reader/HTKMLFWriter.cpp b/DataReader/Kaldi2Reader/HTKMLFWriter.cpp index 8c0881c24..1d6f3f480 100644 --- a/DataReader/Kaldi2Reader/HTKMLFWriter.cpp +++ b/DataReader/Kaldi2Reader/HTKMLFWriter.cpp @@ -27,6 +27,7 @@ #define DATAWRITER_EXPORTS // creating the exports here #include "DataWriter.h" #include "HTKMLFWriter.h" +#include "commandArgUtil.h" #ifdef LEAKDETECT #include // for memory leak detection #endif diff --git a/Makefile_kaldi2.cpu b/Makefile_kaldi2.cpu index b793130dd..abd4e425d 100644 --- a/Makefile_kaldi2.cpu +++ b/Makefile_kaldi2.cpu @@ -31,8 +31,8 @@ DEVICE = cpu #BUILDTYPE = debug BUILDTYPE = release # comment following and uncomment the next one to enable MKL library -#MATHLIB = acml -MATHLIB = mkl +MATHLIB = acml +#MATHLIB = mkl # modify relevant path below for your system MKL_PATH = /usr/users/chiaying/intel/composer_xe_2013.2.146 ACML_PATH = /usr/users/yzhang87/code/acml/gfortran64 @@ -61,7 +61,7 @@ endif # Add KALDI (you need to add your Kaldi path into this file) include kaldi_vars.mk -INCFLAGS = -I Common/Include -I Math/Math -I MachineLearning/cn -I $(MATHLIB_INCLUDE) $(KALDI_INCLUDES) +INCFLAGS = -I Common/Include -I Math/Math -I MachineLearning/CNTK -I $(MATHLIB_INCLUDE) $(KALDI_INCLUDES) CFLAGS = -msse3 -std=c++0x -std=c++11 -DCPUONLY -D_POSIX_SOURCE -D_XOPEN_SOURCE=600 -D__USE_XOPEN2K $(KALDI_DEFINES) $(MATHLIB_DEFINE) -fopenmp -fpermissive -fPIC @@ -70,9 +70,9 @@ COMMON_SRC = Common/fileutil.cpp Common/DataWriter.cpp Common/ConfigFile.cpp Com Common/Eval.cpp Common/File.cpp Common/BestGpu.cpp Common/TimerUtility.cpp MATH_SRC = Math/Math/Matrix.cpp Math/Math/CPUMatrix.cpp Math/Math/CPUSparseMatrix.cpp Math/Math/NoGPU.cpp -CN_SRC = MachineLearning/cn/NetworkDescriptionLanguage.cpp MachineLearning/cn/cn.cpp MachineLearning/cn/ComputationNode.cpp \ - MachineLearning/cn/ModelEditLanguage.cpp MachineLearning/cn/PTaskGraphBuilder.cpp \ - MachineLearning/cn/SimpleNetworkBuilder.cpp MachineLearning/cn/tests.cpp MachineLearning/CNTKEval/CNTKEval.cpp +CN_SRC = MachineLearning/CNTK/NetworkDescriptionLanguage.cpp MachineLearning/CNTK/CNTK.cpp MachineLearning/CNTK/ComputationNode.cpp \ + MachineLearning/CNTK/ModelEditLanguage.cpp \ + MachineLearning/CNTK/SimpleNetworkBuilder.cpp MachineLearning/CNTK/tests.cpp MachineLearning/CNTKEval/CNTKEval.cpp BINARYREADER_SRC = DataReader/BinaryReader/BinaryWriter.cpp DataReader/BinaryReader/BinaryReader.cpp DataReader/BinaryReader/BinaryFile.cpp HTKMLFREADER_SRC = DataReader/HTKMLFReader_linux/HTKMLFWriter.cpp DataReader/HTKMLFReader_linux/DataWriter.cpp DataReader/HTKMLFReader_linux/DataReader.cpp DataReader/HTKMLFReader_linux/HTKMLFReader.cpp KALDIREADER_SRC = DataReader/KaldiReader/HTKMLFWriter.cpp DataReader/KaldiReader/DataWriter.cpp DataReader/KaldiReader/DataReader.cpp DataReader/KaldiReader/HTKMLFReader.cpp @@ -101,7 +101,7 @@ DEP := $(patsubst %.o, %.d, $(OBJ)) SEPARATOR = "=-----------------------------------------------------------=" #all: $(BINDIR)/cn.exe $(BINDIR)/UCIFastReader.so $(BINDIR)/SequenceReader.so $(BINDIR)/LUSequenceReader.so $(BINDIR)/HTKMLFReader.so $(BINDIR)/BinaryReader.so -all: $(BINDIR)/cn.exe $(BINDIR)/UCIFastReader.so $(BINDIR)/LMSequenceReader.so $(BINDIR)/LUSequenceReader.so $(BINDIR)/HTKMLFReader.so $(BINDIR)/Kaldi2Reader.so +all: $(BINDIR)/cntk $(BINDIR)/UCIFastReader.so $(BINDIR)/LMSequenceReader.so $(BINDIR)/LUSequenceReader.so $(BINDIR)/HTKMLFReader.so $(BINDIR)/Kaldi2Reader.so ln -sf $(CURDIR)/$(BINDIR)/* bin @@ -135,7 +135,7 @@ $(BINDIR)/Kaldi2Reader.so: $(KALDI2READER_OBJ) $(CORE_OBJ) $(CC) $(BUILDTYPE_OPT) -fPIC -shared -o $@ $^ $(KALDI_LIBS) -$(BINDIR)/cn.exe: $(CORE_OBJ) +$(BINDIR)/cntk: $(CORE_OBJ) @echo $(SEPARATOR) @mkdir -p $(dir $@) @echo building output for $(ARCH) with build type $(BUILDTYPE) ... From 346dc11cde7a7dedbcf2c1700acb8d4ee0010182 Mon Sep 17 00:00:00 2001 From: Dong Yu Date: Wed, 10 Jun 2015 13:44:54 -0700 Subject: [PATCH 10/21] fix bugs in the binaryReader and UCIFastReader --- DataReader/BinaryReader/BinaryWriter.cpp | 4 ++-- DataReader/UCIFastReader/UCIParser.cpp | 7 ++++++- DataReader/UCIFastReader/UCIParser.h | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/DataReader/BinaryReader/BinaryWriter.cpp b/DataReader/BinaryReader/BinaryWriter.cpp index c30a22b32..e77ec2437 100644 --- a/DataReader/BinaryReader/BinaryWriter.cpp +++ b/DataReader/BinaryReader/BinaryWriter.cpp @@ -47,8 +47,8 @@ BinaryWriter::~BinaryWriter() // miniBatchMode=Partial // randomize=None // wfile=c:\speech\mnist\mnist_test.bin -// #wsize - inital size of the file in MB -// # if calculated size would be bigger, that is used instead +// #wsize - inital size of the file in MB default to 256 +// # has to be large enough for your dataset. the file will shrink to the actual size when closed. // #wsize=256 // #wrecords - number of records we should allocate space for in the file // # files cannot be expanded, so this should be large enough. If known modify this element in config before creating file diff --git a/DataReader/UCIFastReader/UCIParser.cpp b/DataReader/UCIFastReader/UCIParser.cpp index 83244581a..0a9d30317 100644 --- a/DataReader/UCIFastReader/UCIParser.cpp +++ b/DataReader/UCIFastReader/UCIParser.cpp @@ -11,6 +11,11 @@ #include #include +#if WIN32 +#define ftell64 _ftelli64 +#else +#define ftell64 ftell +#endif // SetState for a particular value template @@ -377,7 +382,7 @@ void UCIParser::ParseInit(LPCWSTR fileName, size_t startFeat template int64_t UCIParser::GetFilePosition() { - int64_t position = _ftelli64(m_pFile); + int64_t position = ftell64(m_pFile); if (position == -1L) throw std::runtime_error("UCIParser::GetFilePosition - error retrieving file position in file"); return position; diff --git a/DataReader/UCIFastReader/UCIParser.h b/DataReader/UCIFastReader/UCIParser.h index 07ba939e8..98447d479 100644 --- a/DataReader/UCIFastReader/UCIParser.h +++ b/DataReader/UCIFastReader/UCIParser.h @@ -90,8 +90,8 @@ private: int m_elementsConvertedThisLine; // global stats - int m_totalNumbersConverted; - int m_totalLabelsConverted; + int64_t m_totalNumbersConverted; + int64_t m_totalLabelsConverted; // file positions/buffer FILE * m_pFile; From 66193e0716c4741697d1daaa9e1bfecdfa550e86 Mon Sep 17 00:00:00 2001 From: Marko Radmilac Date: Mon, 8 Jun 2015 14:19:54 -0700 Subject: [PATCH 11/21] Modify build script to support more options --- Scripts/build-and-test | 271 +++++++++++++++++++++++++---------------- 1 file changed, 169 insertions(+), 102 deletions(-) diff --git a/Scripts/build-and-test b/Scripts/build-and-test index 93e68a330..df89a424c 100755 --- a/Scripts/build-and-test +++ b/Scripts/build-and-test @@ -1,8 +1,10 @@ #!/bin/bash # Setting some default values -CNTK_CLEANUP=1 -QUIET_MAKE= +BUILD=1 +RUN=1 +CLEAN_AFTER=0 +CLEAN_BEFORE=0 # parsing command line arguments: while [[ $# > 0 ]] @@ -13,20 +15,32 @@ case $key in -h|--help) echo "Usage: build-and-test [options]" echo "Options:" - echo " -q|--quiet-make - redirect build output to files" - echo " -n|--no-cleanup - leave build binaries intact" - echo "If CNTK root is empty and branch is not specified then master CNTK branch is built" + echo " -q|--quiet-build - redirect build output to files" + echo " -r|--run-only - assume that binaries are already built" + echo " -b|--build-only - just build, do not run" + echo " -cb|--clean-build - clean up the enlistment binaries before build" + echo " -o|--output-directory - specify output directory to use" + echo "Script location in the enlistment is used for finding root directory to build and run" exit 1 ;; - -n|--no-cleanup) - CNTK_CLEANUP=0 + -q|--quiet) + QUIET_BUILD=1 ;; - -q|--quiet-make) - QUIET_MAKE=1 + -r|--run-only) + BUILD=0 + RUN=1 ;; - -*) - echo Unkown option $key - exit 1 + -b|--build-only) + BUILD=1 + RUN=0 + ;; + -cb|--clean-build) + CLEAN_BEFORE=1 + BUILD=1 + ;; + -o|--output-directory) + OUTPUT_DIR="$2" + shift # past argument ;; *) echo Unkown option $key @@ -36,124 +50,177 @@ esac shift # past argument or value done -# Step 0 -- Validate all necessary prerequisites +# Step 0 -- Validate all necessary prerequisites and check for incompatible options # It is possible to use this script on Windows to build CNTK # from Cygwin window with Visual C++ environment loaded. # In that case OS environment variable will be set and we # can use it to differentiate from Linux. -if [[ $OS == "Windows_NT" && $OSTYPE == "cygwin" ]]; then - DEBUG_DIR=Debug - RELEASE_DIR=Release - PREFIX_DIR=x64 - BIN_NAME=CNTK.exe - - if [[ $VCINSTALLDIR == "" ]]; then - echo "============ Visual Studio environment not properly setup ============" - echo "============ Please find and run the appropriate vcvarsall.bat script ============" +if [[ $CLEAN_BEFORE == 1 && $RUN == 1 && $BUILD == 0 ]]; then + echo "============ ERROR: Incompatible options RUN and CLEAN_BEFORE set without BUILD ============" exit 1 - fi +fi + +if [[ $OS == "Windows_NT" && $OSTYPE == "cygwin" ]]; then + DEBUG_DIR=Debug + RELEASE_DIR=Release + PREFIX_DIR=x64 + BIN_NAME=CNTK.exe + + if [[ $VS120COMNTOOLS == "" ]]; then + echo "============ Visual Studio 12.0 environment not properly setup or VS not installed ============" + echo "============ Please find and run the appropriate vcvarsall.bat script ============" + exit 1 + fi + + if [[ $ACML_PATH == "" ]]; then + echo "============ ACML path not set ============" + echo "============ ACML libraries are needed to successfully build CNTK ============" + exit 1 + fi elif [[ $OSTYPE == "linux-gnu" ]]; then - DEBUG_DIR=x86_64.gpu.debug.acml - RELEASE_DIR=x86_64.gpu.release.acml - PREFIX_DIR=bin - BIN_NAME=cntk + DEBUG_DIR=x86_64.gpu.debug.acml + RELEASE_DIR=x86_64.gpu.release.acml + PREFIX_DIR=bin + BIN_NAME=cntk + MAKEFILE=Makefile.gpu else - echo "============ ERROR: Unsupported OS ============" - echo "============ Scripts supports only building from Linux and Windows through Cygwin ============" - exit 1 + echo "============ ERROR: Unsupported OS ============" + echo "============ Scripts supports only building from Linux and Windows through Cygwin ============" + exit 1 fi # Step 1 -- Prepare temporary folders and files, tweak settings if necessary -TMP_ROOT=`mktemp -d /tmp/cntk.XXXXX || exit $?` -echo "============ Creating CNTK temp directory in $TMP_ROOT ============" +if [[ $OUTPUT_DIR == "" ]]; then + TMP_ROOT=`mktemp -d /tmp/cntk.XXXXX || exit $?` + echo "============ Creating CNTK temp directory in $TMP_ROOT ============" + OUTPUT_DIR=$TMP_ROOT +fi -TMP_CONF_FILE=`mktemp $TMP_ROOT/Simple.conf.XXXXX || exit $?` -TMP_RESULT_FILE=`mktemp $TMP_ROOT/Result.XXXXX || exit $?` +CONF_FILE="$OUTPUT_DIR/Simple.conf" +BUILD_FILE="$OUTPUT_DIR/Build" +RUN_FILE="$OUTPUT_DIR/Result" +# Get to the root path from which we know how to build and run SCRIPT=`readlink -f $0` SCRIPT_DIR=`dirname $SCRIPT` CNTK_ROOT=`dirname $SCRIPT_DIR` if ! [[ -d "$CNTK_ROOT/.git" ]]; then - echo "============ ERROR: Build script located in the wrong directory ($SCRIPT_DIR) ============" - error 1 + echo "============ ERROR: Build script located in the wrong directory ($SCRIPT_DIR) ============" + error 1 fi cd $CNTK_ROOT -cp Demos/Simple/Simple.config $TMP_CONF_FILE || exit $? -MAKEFILE=Makefile.gpu -# Our make is too noisy right now and it is difficult to spot -# issues from stdout and stderr. In the quiet mode these are -# redirected to a file where they could be examined after the fact -if [[ $QUIET_MAKE == 1 ]]; then - exec 6>>$TMP_ROOT/stdout || exit $? - exec 7>>$TMP_ROOT/stderr || exit $? -else - exec 6>&1 || exit $? - exec 7>&2 || exit $? +if ! [[ -f $CONF_FILE ]]; then + cp Demos/Simple/Simple.config $CONF_FILE || exit $? + chmod a+r $CONF_FILE fi -# Step 2 -- Perform necessary builds -for FLAVOR in debug release -do - echo "============ Building CNTK $FLAVOR ============" - if [[ $OS == "Windows_NT" ]]; then - msbuild.exe /property:Configuration=$FLAVOR /t:Clean || exit $? - msbuild.exe /property:Configuration=$FLAVOR || exit $? - else - make BUILDTYPE=$FLAVOR -f $MAKEFILE clean || exit $? - make BUILDTYPE=$FLAVOR -j -f $MAKEFILE 1>&6 2>&7 || exit $? - fi -done - -if ! [[ -f "$CNTK_ROOT/$PREFIX_DIR/$DEBUG_DIR/$BIN_NAME" && -f "$CNTK_ROOT/$PREFIX_DIR/$RELEASE_DIR/$BIN_NAME" ]]; then - echo "============ ERROR: CNTK did not build properly ============" - exit 1 +if [[ $QUIET_BUILD == 1 ]]; then + echo "============ WARNING: You have selected quiet build. All build output will be placed in ($OUTPUT_DIR) ============" fi -# Step 3 -- Run the tests to verify that everything works properly -cd $PREFIX_DIR +# Step 2 -- Build the project debug and release, if requested +if [[ $BUILD == 1 ]]; then + # Step 2 -- Perform necessary builds + for FLAVOR in debug release + do + # Our make is too noisy right now and it is difficult to spot + # issues from stdout and stderr. In the quiet mode these are + # redirected to a file where they could be examined after the fact + if [[ $QUIET_BUILD == 1 ]]; then + exec 6>$BUILD_FILE.$FLAVOR.out || exit $? + exec 7>$BUILD_FILE.$FLAVOR.err || exit $? + else + exec 6>&1 || exit $? + exec 7>&2 || exit $? + fi -for TARGET in CPU GPU -do - # These sed scripts are simply toggling DeviceNumber argument in the config file - # If it is set to Auto, it will pick GPU over CPU. At -1 CPU is selected. - if [[ $TARGET == CPU ]]; then - sed -i -e 's/^DeviceNumber.*/DeviceNumber=-1/g' $TMP_CONF_FILE || exit $? - else - sed -i -e 's/^DeviceNumber.*/DeviceNumber=Auto/g' $TMP_CONF_FILE || exit $? - fi + echo "============ Building CNTK $FLAVOR (clean=$CLEAN_BEFORE) ============" - for FLAVOR_DIR in $DEBUG_DIR $RELEASE_DIR - do - echo "============ Running CNTK ($FLAVOR_DIR) ($TARGET) ============" + if [[ $OS == "Windows_NT" ]]; then + if [[ $CLEAN_BEFORE == 1 ]]; then + msbuild.exe /property:Configuration=$FLAVOR /t:Clean 1>&6 2>&7 || exit $? + fi + msbuild.exe /property:Configuration=$FLAVOR /m 1>&6 2>&7 || exit $? + else + if [[ $CLEAN_BEFORE == 1 ]]; then + make BUILDTYPE=$FLAVOR -f $MAKEFILE clean 1>&6 2>&7 || exit $? + fi + make BUILDTYPE=$FLAVOR -j -f $MAKEFILE 1>&6 2>&7 || exit $? + fi + chmod a+r $BUILD_FILE.* + done +fi + +# Step 3 -- Run the project tests, both debug and release, if requested +if [[ $RUN == 1 ]]; then + if ! [[ -f "$CNTK_ROOT/$PREFIX_DIR/$DEBUG_DIR/$BIN_NAME" && -f "$CNTK_ROOT/$PREFIX_DIR/$RELEASE_DIR/$BIN_NAME" ]]; then + echo "============ ERROR: CNTK did not build properly ============" + exit 1 + fi + + cd $PREFIX_DIR + + for TARGET in CPU GPU + do + # These sed scripts are simply toggling DeviceNumber argument in the config file + # If it is set to Auto, it will pick GPU over CPU. At -1 CPU is selected. + if [[ $TARGET == CPU ]]; then + sed -i -e 's/^DeviceNumber.*/DeviceNumber=-1/g' $CONF_FILE || exit $? + else + sed -i -e 's/^DeviceNumber.*/DeviceNumber=Auto/g' $CONF_FILE || exit $? + fi + + for FLAVOR in debug release + do + if [[ FLAVOR == "debug" ]]; then + FLAVOR_DIR="$DEBUG_DIR" + else + FLAVOR_DIR="$RELEASE_DIR" + fi + OUT_FILE="$RUN_FILE.$FLAVOR.out" + + echo "============ Running CNTK for ($FLAVOR) ($TARGET), output in ($RUN_FILE.*) ============" + rm -rf models + if [[ $OS == "Windows_NT" ]]; then + # We have to use cygpath on Windows to modify the file paths into the format readable by cntk. + time ./$FLAVOR_DIR/$BIN_NAME configFile="`cygpath -w $CONF_FILE`" &>$OUT_FILE || exit $? + else + time ./$FLAVOR_DIR/$BIN_NAME configFile=$CONF_FILE &>$OUT_FILE || exit $? + fi + chmod a+r $RUN_FILE.* + + # Check if execution was successful + grep -q "Using $TARGET" "$OUT_FILE" || { + echo "============ ERROR: Run output (in $OUT_FILE) did not contain information about target device ($TARGET) ============" + exit 1 + } + + grep -q "EXCEPTION" "$OUT_FILE" && { + echo "============ ERROR: Run output in ($OUT_FILE) contains exceptions ============" + grep "EXCEPTION" "$OUT_FILE" + exit 1 + } + done + done +fi + +# Step 5 -- Optionally clean after builds and tests +if [[ $CLEAN_AFTER == 1 ]]; then rm -rf models - if [[ $OS == "Windows_NT" ]]; then - # We have to use cygpath on Windows to modify the file paths into the format readable by cntk. - time ./$FLAVOR_DIR/$BIN_NAME configFile="`cygpath -w $TMP_CONF_FILE`" 2>$TMP_RESULT_FILE || exit $? - else - time ./$FLAVOR_DIR/$BIN_NAME configFile=$TMP_CONF_FILE 2>$TMP_RESULT_FILE || exit $? - fi - grep -q "Using $TARGET" $TMP_RESULT_FILE || exit $? - grep -q "EXCEPTION" $TMP_RESULT_FILE && exit $? - done -done - -# Step 4 -- Optionally cleanup after builds and tests -if [[ $CNTK_CLEANUP == 1 ]]; then - rm -rf models - cd $CNTK_ROOT - for FLAVOR in debug release - do - echo "============ Cleaning up CNTK $FLAVOR ============" - if [[ $OS == "Windows_NT" ]]; then - msbuild.exe /property:Configuration=$FLAVOR /t:Clean || exit $? - else - make BUILDTYPE=$FLAVOR -f $MAKEFILE clean || exit $? - fi - done - rm -rf $TMP_ROOT + cd $CNTK_ROOT + for FLAVOR in debug release + do + echo "============ Cleaning up CNTK $FLAVOR ============" + if [[ $OS == "Windows_NT" ]]; then + msbuild.exe /property:Configuration=$FLAVOR /t:clean 1>&6 2>&7 || exit $? + else + make BUILDTYPE=$FLAVOR -f $MAKEFILE clean 1>&6 2>&7 || exit $? + fi + done + rm -rf $OUTPUT_DIR fi echo "============ Build and test of CNTK was successful! ============" From c518eb2203b00dc7d3fdf8021413c685db753b95 Mon Sep 17 00:00:00 2001 From: Marko Radmilac Date: Tue, 9 Jun 2015 12:26:36 -0700 Subject: [PATCH 12/21] Make default output directory under cntk for simplicity, and address code review feedback --- .gitignore | 1 + Scripts/build-and-test | 34 +++++++++++++++++++--------------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 664cc1b07..0ef78ce74 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ x64/ build/ [Bb]in/ [Oo]bj/ +.run-* # Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets !packages/*/build/ diff --git a/Scripts/build-and-test b/Scripts/build-and-test index df89a424c..4b452dd9d 100755 --- a/Scripts/build-and-test +++ b/Scripts/build-and-test @@ -15,12 +15,12 @@ case $key in -h|--help) echo "Usage: build-and-test [options]" echo "Options:" - echo " -q|--quiet-build - redirect build output to files" - echo " -r|--run-only - assume that binaries are already built" + echo " -q|--quiet-build - redirect build output to file (by default those will be in .run--*)" + echo " -r|--run-only - elides build step, runs the binaries that have already been built" echo " -b|--build-only - just build, do not run" echo " -cb|--clean-build - clean up the enlistment binaries before build" echo " -o|--output-directory - specify output directory to use" - echo "Script location in the enlistment is used for finding root directory to build and run" + echo "The root directory used to build and run CNTK is hosts the Scripts directory that contains this script" exit 1 ;; -q|--quiet) @@ -65,6 +65,7 @@ if [[ $OS == "Windows_NT" && $OSTYPE == "cygwin" ]]; then RELEASE_DIR=Release PREFIX_DIR=x64 BIN_NAME=CNTK.exe + BUILD_OS="windows" if [[ $VS120COMNTOOLS == "" ]]; then echo "============ Visual Studio 12.0 environment not properly setup or VS not installed ============" @@ -83,6 +84,7 @@ elif [[ $OSTYPE == "linux-gnu" ]]; then PREFIX_DIR=bin BIN_NAME=cntk MAKEFILE=Makefile.gpu + BUILD_OS="linux" else echo "============ ERROR: Unsupported OS ============" echo "============ Scripts supports only building from Linux and Windows through Cygwin ============" @@ -90,31 +92,33 @@ else fi # Step 1 -- Prepare temporary folders and files, tweak settings if necessary -if [[ $OUTPUT_DIR == "" ]]; then - TMP_ROOT=`mktemp -d /tmp/cntk.XXXXX || exit $?` - echo "============ Creating CNTK temp directory in $TMP_ROOT ============" - OUTPUT_DIR=$TMP_ROOT -fi - -CONF_FILE="$OUTPUT_DIR/Simple.conf" -BUILD_FILE="$OUTPUT_DIR/Build" -RUN_FILE="$OUTPUT_DIR/Result" # Get to the root path from which we know how to build and run SCRIPT=`readlink -f $0` SCRIPT_DIR=`dirname $SCRIPT` CNTK_ROOT=`dirname $SCRIPT_DIR` -if ! [[ -d "$CNTK_ROOT/.git" ]]; then +# Setup the output directory +if [[ $OUTPUT_DIR == "" ]]; then + OUTPUT_DIR="$CNTK_ROOT/.run-$BUILD_OS-$RANDOM" +fi + +echo "============ Creating CNTK temp directory in $TMP_ROOT ============" +mkdir -p $OUTPUT_DIR || exit $? + +CONF_FILE="$OUTPUT_DIR/Simple.conf" +BUILD_FILE="$OUTPUT_DIR/Build" +RUN_FILE="$OUTPUT_DIR/Result" + +if ! [[ -d "$CNTK_ROOT/MachineLearning" ]]; then echo "============ ERROR: Build script located in the wrong directory ($SCRIPT_DIR) ============" - error 1 + exit 1 fi cd $CNTK_ROOT if ! [[ -f $CONF_FILE ]]; then cp Demos/Simple/Simple.config $CONF_FILE || exit $? - chmod a+r $CONF_FILE fi if [[ $QUIET_BUILD == 1 ]]; then From 5fec0dcca82635cffd0df4effd4ade98c0f99068 Mon Sep 17 00:00:00 2001 From: thhoens Date: Thu, 11 Jun 2015 11:52:36 -0700 Subject: [PATCH 13/21] Fixed a bug where the GPUSparseMatrix class would claim that the MajorIndexCount was equal to the allocated space, instead of the number of elements. This brings it in line with the CPUSparseMatrix class. --- Math/Math/GPUSparseMatrix.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Math/Math/GPUSparseMatrix.h b/Math/Math/GPUSparseMatrix.h index f032a7f68..e2e3d0070 100644 --- a/Math/Math/GPUSparseMatrix.h +++ b/Math/Math/GPUSparseMatrix.h @@ -79,16 +79,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t MajorIndexCount() const { - return MajorIndexCount(m_numRows, m_numCols, m_elemSizeAllocated, m_format); + return MajorIndexCount(m_numRows, m_numCols, m_nz, m_format); } - size_t MajorIndexCount(const size_t numRows, const size_t numCols, const size_t numNZReserved, const MatrixFormat format) const + size_t MajorIndexCount(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat format) const { if (format == matrixFormatSparseBlockCol) return numCols; else if (format == matrixFormatSparseBlockRow) return numRows; else - return numNZReserved; + return numNZ; } size_t MajorIndexSize() const // actual number of major index bytes in use { From adeab1eb201cb532d1587ad6229069b983501faf Mon Sep 17 00:00:00 2001 From: Dong Yu Date: Thu, 11 Jun 2015 18:43:09 -0700 Subject: [PATCH 14/21] implemented the RowStackNode which supports variable number of inputs. Passed unit tests and simple gradient check on MNIST. This change involves many files. --- MachineLearning/CNTK/ComputationNetwork.h | 78 +- MachineLearning/CNTK/ComputationNode.h | 5 + MachineLearning/CNTK/LinearAlgebraNodes.h | 161 +++ .../CNTK/NetworkDescriptionLanguage.cpp | 2 + MachineLearning/CNTK/NonlinearityNodes.h | 930 +++++++++--------- .../CNTK/SynchronousExecutionEngine.h | 53 +- Math/CNTKMathTest/CPUMatrixUnitTests.cpp | 22 +- Math/CNTKMathTest/GPUMatrixUnitTests.cpp | 23 +- Math/Math/CPUMatrix.cpp | 42 + Math/Math/CPUMatrix.h | 1 + Math/Math/GPUMatrix.cu | 57 ++ Math/Math/GPUMatrix.h | 1 + Math/Math/GPUMatrixCUDAKernels.cu | 21 + Math/Math/Matrix.cpp | 62 ++ Math/Math/Matrix.h | 1 + Math/Math/NoGPU.cpp | 1 + 16 files changed, 941 insertions(+), 519 deletions(-) diff --git a/MachineLearning/CNTK/ComputationNetwork.h b/MachineLearning/CNTK/ComputationNetwork.h index 671ecf62d..3b8c515b6 100644 --- a/MachineLearning/CNTK/ComputationNetwork.h +++ b/MachineLearning/CNTK/ComputationNetwork.h @@ -548,41 +548,38 @@ public: } ComputationNodePtr nodePtr = GetNodeFromName(nodeName); - ComputationNodePtr childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4; - switch (numChildren) + std::vector childrenNodes; + childrenNodes.resize(numChildren); + for (int j = 0; j < numChildren; j++) + childrenNodes[j] = GetNodeFromName(childrenNames[j]); + + if (nodePtr->OperationName() == RowStackNode::TypeName()) //allow for variable input nodes + nodePtr->AttachInputs(childrenNodes); + else //fixed input nodes { - case 1: - childNodePtr0 = GetNodeFromName(childrenNames[0]); - nodePtr->AttachInputs(childNodePtr0); - break; - case 2: - childNodePtr0 = GetNodeFromName(childrenNames[0]); - childNodePtr1 = GetNodeFromName(childrenNames[1]); - nodePtr->AttachInputs(childNodePtr0, childNodePtr1); - break; - case 3: - childNodePtr0 = GetNodeFromName(childrenNames[0]); - childNodePtr1 = GetNodeFromName(childrenNames[1]); - childNodePtr2 = GetNodeFromName(childrenNames[2]); - nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2); - break; - case 4: - childNodePtr0 = GetNodeFromName(childrenNames[0]); - childNodePtr1 = GetNodeFromName(childrenNames[1]); - childNodePtr2 = GetNodeFromName(childrenNames[2]); - childNodePtr3 = GetNodeFromName(childrenNames[3]); - nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3); - break; - case 5: - childNodePtr0 = GetNodeFromName(childrenNames[0]); - childNodePtr1 = GetNodeFromName(childrenNames[1]); - childNodePtr2 = GetNodeFromName(childrenNames[2]); - childNodePtr3 = GetNodeFromName(childrenNames[3]); - childNodePtr4 = GetNodeFromName(childrenNames[4]); - nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4); - break; - default: - throw std::logic_error("Invalid number of children."); + switch (numChildren) + { + case 1: + nodePtr->AttachInputs(childrenNodes[0]); + break; + case 2: + nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1]); + break; + case 3: + nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2]); + break; + case 4: + nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3]); + break; + case 5: + nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3], childrenNodes[4]); + break; + case 6: + nodePtr->AttachInputs(childrenNodes[0], childrenNodes[1], childrenNodes[2], childrenNodes[3], childrenNodes[4], childrenNodes[5]); + break; + default: + throw std::logic_error("Invalid number of children."); + } } } } @@ -1021,6 +1018,8 @@ public: newNode = new LookupTableNode(fstream, modelVersion, m_deviceId, nodeName); else if (nodeType == RowSliceNode::TypeName()) newNode = new RowSliceNode(fstream, modelVersion, m_deviceId, nodeName); + else if (nodeType == RowStackNode::TypeName()) + newNode = new RowStackNode(fstream, modelVersion, m_deviceId, nodeName); else if (nodeType == GMMLogLikelihoodNode::TypeName()) newNode = new GMMLogLikelihoodNode(fstream, modelVersion, m_deviceId, nodeName); else if (nodeType == CosDistanceWithNegativeSamplesNode::TypeName()) @@ -1190,6 +1189,8 @@ public: newNode = new TimeReverseNode(m_deviceId, nodeName); else if (nodeType == CosDistanceWithNegativeSamplesNode::TypeName()) newNode = new CosDistanceWithNegativeSamplesNode(m_deviceId, nodeName); + else if (nodeType == RowStackNode::TypeName()) + newNode = new RowStackNode(m_deviceId, nodeName); else { fprintf(stderr, "Error creating new ComputationNode of type %ls, with name %ls\n", nodeType.c_str(), nodeName.c_str()); @@ -1529,6 +1530,15 @@ public: return newNode; } + ComputationNodePtr RowStack(const std::vector inputs, const std::wstring nodeName = L"") + { + ComputationNodePtr newNode(new RowStackNode(m_deviceId, nodeName)); + newNode->AttachInputs(inputs); + AddNodeToNet(newNode); + + return newNode; + } + ComputationNodePtr GMMLogLikelihood(const ComputationNodePtr unnormedPrior, const ComputationNodePtr mean, const ComputationNodePtr logStddev, const ComputationNodePtr feature, const std::wstring nodeName = L"") { ComputationNodePtr newNode(new GMMLogLikelihoodNode(m_deviceId, nodeName)); diff --git a/MachineLearning/CNTK/ComputationNode.h b/MachineLearning/CNTK/ComputationNode.h index 9cd84b91b..55471acd3 100644 --- a/MachineLearning/CNTK/ComputationNode.h +++ b/MachineLearning/CNTK/ComputationNode.h @@ -152,6 +152,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::logic_error("This operation does not support six inputs."); } + virtual void AttachInputs(const std::vector& /*inputs*/) + { + throw std::logic_error("This operation does not support variable-length inputs."); + } + virtual void DetachInputs() { m_children.resize(0); diff --git a/MachineLearning/CNTK/LinearAlgebraNodes.h b/MachineLearning/CNTK/LinearAlgebraNodes.h index eb3ecc8bc..ffbda78ea 100644 --- a/MachineLearning/CNTK/LinearAlgebraNodes.h +++ b/MachineLearning/CNTK/LinearAlgebraNodes.h @@ -429,6 +429,167 @@ namespace Microsoft { namespace MSR { namespace CNTK { template class RowSliceNode; template class RowSliceNode; + //this node is used to extract part of the input by rows as the output + //it has to be continuous segments of rows since each column is treated as one sample + template + class RowStackNode : public ComputationNode + { + UsingComputationNodeMembers; + public: + RowStackNode(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode(deviceId) + { + m_nodeName = (name == L"" ? CreateUniqNodeName() : name); + m_deviceId = deviceId; + MoveMatricesToDevice(deviceId); + InitRecurrentNode(); + } + + RowStackNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") : ComputationNode(deviceId) + { + m_nodeName = (name == L"" ? CreateUniqNodeName() : name); + LoadFromFile(fstream, modelVersion, deviceId); + } + + // copy constructor + RowStackNode(const RowStackNode* node, const std::wstring& newName, const CopyNodeFlags flags) : ComputationNode(node->m_deviceId) + { + node->CopyTo(this, newName, flags); + } + + virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const + { + const std::wstring& name = (newName == L"") ? NodeName() : newName; + + ComputationNodePtr node = new RowStackNode(this, name, flags); + return node; + } + + virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const + { + ComputationNode::CopyTo(nodeP, newName, flags); + RowStackNode* node = (RowStackNode*) nodeP; + + if (flags & CopyNodeFlags::copyNodeChildren) + { + node->m_children = m_children; + node->m_startRowIndeces = m_startRowIndeces; + node->m_inputMatrices = m_inputMatrices; + } + } + + virtual const std::wstring OperationName() const { return TypeName(); } + static const std::wstring TypeName() { return L"RowStack"; } + + virtual void ComputeInputPartial(const size_t inputIndex) + { + if (inputIndex >= ChildrenSize()) + throw std::invalid_argument("RowStack-ComputeInputPartial: inputIndex out of range."); + + ComputeInputPartialS(Inputs(inputIndex)->GradientValues(), GradientValues(), m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex + 1] - m_startRowIndeces[inputIndex]); + } + + virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq) + { + if (inputIndex >= ChildrenSize()) + throw std::invalid_argument("RowStack-ComputeInputPartial: inputIndex out of range."); + + Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + + ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex+1] - m_startRowIndeces[inputIndex]); + } + + static void WINAPI ComputeInputPartialS(Matrix& inputGradientValues, const Matrix& gradientValues, const size_t startIndex, const size_t numRows) + { + inputGradientValues.AddWithRowSliceValuesOf(gradientValues, startIndex, numRows); + } + + virtual void EvaluateThisNode() + { + EvaluateThisNodeS(m_functionValues, m_inputMatrices, 0, Inputs(0)->FunctionValues().GetNumCols()); + } + + virtual void EvaluateThisNode(const size_t timeIdxInSeq) + { + Matrix sliceFunctionValues = FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + + EvaluateThisNodeS(sliceFunctionValues, m_inputMatrices, timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + } + + static void WINAPI EvaluateThisNodeS(Matrix& functionValues, const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) + { + functionValues.AssignRowStackValuesOf(inputMatrices, sliceStartCol, sliceNumCols); +#if NANCHECK + functionValues.HasNan("RowStack"); +#endif + } + + virtual void Validate() + { + PrintSelfBeforeValidation(); + + unsigned int numInputs = ChildrenSize(); + if (numInputs < 2) + LogicError("RowStack operation: must have two or more inputs."); + + if (Inputs(0) == nullptr) + LogicError("RowStack operation: the input node is NULL."); + + size_t numCols = Inputs(0)->FunctionValues().GetNumCols(); + m_startRowIndeces.resize(ChildrenSize()+1); + m_inputMatrices.resize(ChildrenSize()); + + size_t totalRows = 0; + m_startRowIndeces[0] = 0; + + for (int i = 0; i < ChildrenSize(); i++) + { + if (Inputs(i) == nullptr) + LogicError("RowStack operation: the input node is NULL."); + + Matrix& childMatrix = Inputs(i)->FunctionValues(); + size_t numRows = childMatrix.GetNumRows(); + if (numRows == 0) + LogicError("RowStack operation: the input node %ls has 0 rows.", Inputs(i)->NodeName().c_str()); + + if (childMatrix.GetNumCols() != numCols) + LogicError("RowStack operation: the input node %ls has different number of columns.", Inputs(i)->NodeName().c_str()); + + totalRows += numRows; + m_inputMatrices[i] = &childMatrix; + m_startRowIndeces[i + 1] = m_startRowIndeces[i] + numRows; + } + + FunctionValues().Resize(totalRows, numCols); + CopyImageSizeFromInputs(); + } + + virtual void CopyImageSizeFromInputs() + { + CopyImageSizeFromInput(0, true); + m_outputHeight = FunctionValues().GetNumRows(); + + //WARNING: this node will destroy the image size information from the child + if (m_inputWidth * m_inputChannels != 1) + fprintf(stderr, "WARNING: RowStack operation cannot inherit image size information from its child. Image size info is lost.\n"); + } + + virtual void AttachInputs(const std::vector& inputs) + { + unsigned int numInputs = inputs.size(); + m_children.resize(numInputs); + for (unsigned int i = 0; i < numInputs; i++) + m_children[i] = inputs[i]; + } + + private: + std::vector m_startRowIndeces; //start row number in the stacked matrix of each input (child) + std::vector*> m_inputMatrices; + }; + + template class RowStackNode; + template class RowStackNode; + template class ScaleNode : public ComputationNode { diff --git a/MachineLearning/CNTK/NetworkDescriptionLanguage.cpp b/MachineLearning/CNTK/NetworkDescriptionLanguage.cpp index 4f2f2309f..c4e35c5ad 100644 --- a/MachineLearning/CNTK/NetworkDescriptionLanguage.cpp +++ b/MachineLearning/CNTK/NetworkDescriptionLanguage.cpp @@ -220,6 +220,8 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable) ret = true; else if (EqualInsensitive(nodeType, RowSliceNode::TypeName())) ret = true; + else if (EqualInsensitive(nodeType, RowStackNode::TypeName())) + ret = true; else if (EqualInsensitive(nodeType, LookupTableNode::TypeName())) ret = true; else if (EqualInsensitive(nodeType, GMMLogLikelihoodNode::TypeName(), L"GMMLL")) diff --git a/MachineLearning/CNTK/NonlinearityNodes.h b/MachineLearning/CNTK/NonlinearityNodes.h index 032851e96..015f7d884 100644 --- a/MachineLearning/CNTK/NonlinearityNodes.h +++ b/MachineLearning/CNTK/NonlinearityNodes.h @@ -1149,469 +1149,469 @@ namespace Microsoft { namespace MSR { namespace CNTK { template class LogSoftmaxNode; template class LogSoftmaxNode; - //calculates: the log likelihood of a feature given GMM parameters - template - class GMMLogLikelihoodNode : public ComputationNode - { - UsingComputationNodeMembers; - public: - GMMLogLikelihoodNode(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") - : ComputationNode(deviceId), m_prior(deviceId), m_normedDeviation(deviceId), m_normedDeviationVectors(deviceId), m_stddev(deviceId), m_posterior(deviceId), m_temp(deviceId) - { - m_nodeName = (name == L"" ? CreateUniqNodeName() : name); - m_deviceId = deviceId; - MoveMatricesToDevice(deviceId); - InitRecurrentNode(); - } - - GMMLogLikelihoodNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") - : ComputationNode(deviceId), m_prior(deviceId), m_normedDeviation(deviceId), m_normedDeviationVectors(deviceId), m_stddev(deviceId), m_posterior(deviceId), m_temp(deviceId) - { - m_nodeName = (name == L"" ? CreateUniqNodeName() : name); - LoadFromFile(fstream, modelVersion, deviceId); - } - - // copy constructor - GMMLogLikelihoodNode(const GMMLogLikelihoodNode* node, const std::wstring& newName, const CopyNodeFlags flags) - : ComputationNode(node->m_deviceId), m_prior(node->m_deviceId), m_normedDeviation(node->m_deviceId), m_normedDeviationVectors(node->m_deviceId), - m_stddev(node->m_deviceId), m_posterior(node->m_deviceId), m_temp(m_deviceId) - { - node->CopyTo(this, newName, flags); - } - - virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const - { - const std::wstring& name = (newName == L"") ? NodeName() : newName; - - ComputationNodePtr node = new GMMLogLikelihoodNode(this, name, flags); - return node; - } - - virtual const std::wstring OperationName() const { return TypeName(); } - static const std::wstring TypeName() { return L"GMMLogLikelihood"; } - - virtual void ComputeInputPartial(const size_t inputIndex) - { - switch (inputIndex) - { - case 0: - ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), m_gradientValues, m_prior, m_posterior, m_temp); - break; - case 1: - ComputeInputPartialMean(Inputs(1)->GradientValues(), m_gradientValues, m_normedDeviationVectors, m_posterior, m_temp); - break; - case 2: - ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), m_gradientValues, m_normedDeviation, m_posterior, m_temp); - break; - case 3: - ComputeInputPartialFeature(Inputs(3)->GradientValues(), m_gradientValues, m_normedDeviationVectors, m_posterior, m_temp); - break; - default: - throw std::invalid_argument("GMMLogLikelihoodNode only takes four inputs."); - } - } - - virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq) - { - //get the right slice - size_t startIndex = timeIdxInSeq * m_samplesInRecurrentStep; - - size_t colsPrior = Inputs(0)->FunctionValues().GetNumCols(); - - Matrix sliceGradientValue = m_gradientValues.ColumnSlice(startIndex, m_samplesInRecurrentStep); - Matrix slicePosterior = m_posterior.ColumnSlice(startIndex, m_samplesInRecurrentStep); - - switch (inputIndex) - { - case 0: - { - if (colsPrior == 1) - ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, m_prior, slicePosterior, m_temp); - else - { - Matrix sliceUnnormedPriorGradient = Inputs(0)->GradientValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); - Matrix slicePrior = m_prior.ColumnSlice(startIndex, m_samplesInRecurrentStep); - ComputeInputPartialUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, m_temp); - } - } - break; - case 1: - { - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.ColumnSlice(startIndex, m_samplesInRecurrentStep); - if (colsPrior == 1) - ComputeInputPartialMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); - else - { - Matrix sliceMeanGradient = Inputs(1)->GradientValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); - ComputeInputPartialMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); - } - } - break; - case 2: - { - Matrix sliceNormedDeviation = m_normedDeviation.ColumnSlice(startIndex, m_samplesInRecurrentStep); - if (colsPrior == 1) - ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); - else - { - Matrix sliceLotStddevGradient = Inputs(2)->GradientValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); - ComputeInputPartialLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); - } - } - break; - case 3: - { - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.ColumnSlice(startIndex, m_samplesInRecurrentStep); - Matrix sliceFeatureGradient = Inputs(3)->GradientValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); - ComputeInputPartialFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); - } - break; - default: - throw std::invalid_argument("GMMLogLikelihoodNode criterion only takes four inputs."); - } - } - - static void WINAPI ComputeInputPartialUnnormedPrior(Matrix& unnormedPriorGradientValues, const Matrix& gradientValues, - const Matrix& prior, const Matrix& posterior, Matrix& temp) - { - temp.AssignDifferenceOf(posterior, prior); - temp.RowElementMultiplyWith(gradientValues); - if (prior.GetNumCols() == posterior.GetNumCols()) - { - unnormedPriorGradientValues += temp; - } - else if (prior.GetNumCols() == 1) - { - Matrix::MultiplyAndAdd(temp, false, ConstOnes(posterior.GetNumCols(), 1, unnormedPriorGradientValues.GetDeviceId()), false, unnormedPriorGradientValues); - } - else - { - throw std::runtime_error("GMMLogLikelihoodNode: UnnormedPrior should either have same number of columns as the features or have only one column."); - } - } - - static void WINAPI ComputeInputPartialMean(Matrix& meanGradientValues, const Matrix& gradientValues, const Matrix& normedDeviationVectors, - Matrix& posterior, Matrix& temp) - { - size_t numComponent = posterior.GetNumRows(); - size_t numSamples = posterior.GetNumCols(); - size_t featureSize = normedDeviationVectors.GetNumRows() / numComponent; - - temp.SetValue(normedDeviationVectors); //recall normedDeviationVectors <-- (x-u_c)/(stddev^2) - temp.Reshape(featureSize, numSamples* numComponent); - - posterior.Reshape(1, numSamples* numComponent); - temp.RowElementMultiplyWith(posterior); //temp <-- posterior * (x-u_c)/(stddev^2) - - posterior.Reshape(numComponent, numSamples); //reshape back - temp.Reshape(featureSize * numComponent, numSamples); //reshape back - - temp.RowElementMultiplyWith(gradientValues); - - if (numSamples == meanGradientValues.GetNumCols()) - { - meanGradientValues += temp; - } - else if (meanGradientValues.GetNumCols() == 1) - { - Matrix::MultiplyAndAdd(temp, false, ConstOnes(numSamples, 1, meanGradientValues.GetDeviceId()), false, meanGradientValues); - } - else - { - throw std::runtime_error("GMMLogLikelihoodNode: stddev should either have same number of columns as the features or have only one column."); - } - } - - static void WINAPI ComputeInputPartialLogStddev(Matrix& logStddevGradientValues, const Matrix& gradientValues, const Matrix& normedDeviation, - const Matrix& posterior, Matrix& temp) - { - size_t numComponent = posterior.GetNumRows(); - size_t numSamples = posterior.GetNumCols(); - - temp.AssignDifferenceOf(normedDeviation, (ElemType)numComponent); - temp.ElementMultiplyWith(posterior); - temp.RowElementMultiplyWith(gradientValues); - if (logStddevGradientValues.GetNumCols() == numSamples) - { - logStddevGradientValues += temp; - } - else if (logStddevGradientValues.GetNumCols() == 1) - { - Matrix::MultiplyAndAdd(temp, false, ConstOnes(numSamples, 1, logStddevGradientValues.GetDeviceId()), false, logStddevGradientValues); - } - else - { - throw std::runtime_error("GMMLogLikelihoodNode: stddev should either have same number of columns as the features or have only one column."); - } - } - - static void WINAPI ComputeInputPartialFeature(Matrix& featureGradientValues, const Matrix& gradientValues, const Matrix& normedDeviationVectors, - Matrix& posterior, Matrix& temp) - { - size_t numComponent = posterior.GetNumRows(); - size_t numSamples = posterior.GetNumCols(); - size_t featureSize = normedDeviationVectors.GetNumRows() / numComponent; - - temp.SetValue(normedDeviationVectors); - temp *= -1; - temp.Reshape(featureSize, numSamples* numComponent); - posterior.Reshape(1, numSamples* numComponent); - temp.RowElementMultiplyWith(posterior); - - posterior.Reshape(numComponent, numSamples); - temp.Reshape(featureSize * numComponent, numSamples); - temp.RowElementMultiplyWith(gradientValues); - - for (int i = 0; i < numComponent; i++) - featureGradientValues.AddWithRowSliceValuesOf(temp, i*featureSize, featureSize); - } - - virtual void SetFunctionAndGradientSize(const int numSamples) - { - ComputationNode::SetFunctionAndGradientSize(numSamples); - - size_t numComponents = Inputs(0)->FunctionValues().GetNumRows(); - size_t colsPrior = Inputs(0)->FunctionValues().GetNumCols(); - //size_t numSamples = Inputs(3)->FunctionValues().GetNumCols(); - size_t featureSize = Inputs(3)->FunctionValues().GetNumRows(); - - m_prior.Resize(numComponents, colsPrior); - m_stddev.Resize(numComponents, colsPrior); - m_normedDeviation.Resize(numComponents, numSamples); - m_normedDeviationVectors.Resize(numComponents * featureSize, numSamples); - m_posterior.Resize(numComponents, numSamples); - } - - //input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature - virtual void EvaluateThisNode() - { - // all internal matrices will be automatically resized since all of them are assigned to a value so no resize is needed here. - EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), - m_prior, m_stddev, m_normedDeviationVectors, m_normedDeviation, m_posterior, m_temp); - } - - //input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature - virtual void EvaluateThisNode(const size_t timeIdxInSeq) - { - size_t colsPrior = Inputs(0)->FunctionValues().GetNumCols(); - size_t numSamples = Inputs(3)->FunctionValues().GetNumCols(); - - //get the right slice - size_t startIndex = timeIdxInSeq * m_samplesInRecurrentStep; - - Matrix sliceOutputValue = m_functionValues.ColumnSlice(startIndex, m_samplesInRecurrentStep); - Matrix sliceFeature = Inputs(3)->FunctionValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); - Matrix sliceNormedDeviation = m_normedDeviation.ColumnSlice(startIndex, m_samplesInRecurrentStep); - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.ColumnSlice(startIndex, m_samplesInRecurrentStep); - Matrix slicePosterior = m_posterior.ColumnSlice(startIndex, m_samplesInRecurrentStep); - - if (colsPrior == 1) - { - EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), sliceFeature, - m_prior, m_stddev, sliceNormedDeviationVectors, sliceNormedDeviation, slicePosterior, m_temp); - } - else if (colsPrior == numSamples) - { - Matrix sliceUnnormedPrior = Inputs(0)->FunctionValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); - Matrix sliceMean = Inputs(1)->FunctionValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); - Matrix sliceLogstddev = Inputs(2)->FunctionValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); - - Matrix slicePrior = m_prior.ColumnSlice(startIndex, m_samplesInRecurrentStep); - Matrix sliceStddev = m_stddev.ColumnSlice(startIndex, m_samplesInRecurrentStep); - - EvaluateThisNodeS(sliceOutputValue, sliceUnnormedPrior, sliceMean, sliceLogstddev, sliceFeature, - slicePrior, sliceStddev, sliceNormedDeviationVectors, sliceNormedDeviation, slicePosterior, m_temp); - } - else //should not reach the code since validation should fail already - { - throw std::runtime_error("GMMLogLikelihoodNode: UnnormedPrior should either have same number of columns as the features or have only one column."); - } - - } - - //input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature - //If we want to speed up we need to replace following code with a several specialized GPU functions - static void WINAPI EvaluateThisNodeS(Matrix& functionValues, const Matrix& unnormedPrior, const Matrix& mean, Matrix& logstddev, - const Matrix& feature, Matrix& prior, Matrix& stddev, Matrix& normedDeviationVectors, - Matrix& normedDeviation, Matrix& posterior, Matrix& temp) - { - int numComponent = unnormedPrior.GetNumRows(); - size_t numSamples = feature.GetNumCols(); - size_t featureDim = feature.GetNumRows(); - - //compute prior which is softmax of unnormedPrior - prior.AssignLogSoftmaxOf(unnormedPrior, true); //log prior - - prior.InplaceExp(); - - //compute stddev - stddev.AssignExpOf(logstddev); - -#if DUMPOUTPUT - unnormedPrior.Print("unnormedPrior", 0, min(5, unnormedPrior.GetNumRows() - 1), 0, min(10, unnormedPrior.GetNumCols() - 1)); - mean.Print("mean", 0, min(5, mean.GetNumRows() - 1), 0, min(10, mean.GetNumCols() - 1)); - logstddev.Print("logstddev", 0, min(5, logstddev.GetNumRows() - 1), 0, min(10, logstddev.GetNumCols() - 1)); - - prior.Print("prior", 0, min(5, prior.GetNumRows() - 1), 0, min(10, prior.GetNumCols() - 1)); - stddev.Print("stddev", 0, min(5, stddev.GetNumRows() - 1), 0, min(10, stddev.GetNumCols() - 1)); -#endif - - //compute normedDeviation <-- ||x-u_c||^2/(stddev^2) - normedDeviationVectors.AssignRepeatOf(feature, numComponent, 1); - normedDeviationVectors -= mean; //each column of the mean has multiple mean components - normedDeviationVectors.Reshape(featureDim, numSamples* numComponent); //now each column is feature-mean_i - - normedDeviation.AssignVectorNorm2Of(normedDeviationVectors, true); - normedDeviation ^= 2; - temp.AssignRepeatOf(stddev, 1, numSamples / stddev.GetNumCols()); //stddev.GetNumCols() is either 1 or =numSamples - temp.Reshape(1, temp.GetNumElements()); //one stddev value for each component for each sample - temp ^= 2; - normedDeviation.ElementDivideBy(temp); //normedDeviation and temp have same dim (1, numSamples* numComponent) - - //compute normedDeviationVectors <-- (x-u_c)/(stddev^2) - normedDeviationVectors.RowElementDivideBy(temp); //divide twice - normedDeviationVectors.Reshape(featureDim*numComponent, numSamples); //reshape back - - //compute per-component likelihood - posterior.AssignProductOf(-0.5f, normedDeviation); //posterior <-- -||x-u_c||^2/(stddev^2)/2 and in (1, numSamples* numComponent) dim - temp.InplaceLog(); - temp *= ((ElemType)numComponent / 2.0f); //temp <-- stddev^c and in (1, numSamples* numComponent) dim - posterior -= temp; // posterior <-- exp[-||x-u_c||^2/(stddev^2)/2]/(stddev^c) - posterior -= (ElemType)(numComponent / 2.0f*log(TWO_PI)); //likelihood for each component and sample is now computed and stored in posterior - posterior.InplaceExp(); //posterior <-- exp(-||x-u_c||^2/(stddev^2)/2) - - normedDeviation.Reshape(numComponent, numSamples); //reshape back - posterior.Reshape(numComponent, numSamples); //reshape back - - //compute posterior <-- prior_i * likelihood_i - if (unnormedPrior.GetNumCols() == numSamples) //each sample has different prior - posterior.ElementMultiplyWith(prior); - else //all samples share the same prior - posterior.ColumnElementMultiplyWith(prior); - - //compute GMM log-likelihood - Matrix::Multiply(ConstOnes(1, numComponent, posterior.GetDeviceId()), false, posterior, false, functionValues); //functionValues <-- total likelihood - posterior.RowElementDivideBy(functionValues); //posterior <-- per-comp likelihood / total likelihood - functionValues.InplaceLog(); //log likelihood - -#if DUMPOUTPUT - temp.Print("temp", 0, min(5, temp.GetNumRows() - 1), 0, min(10, temp.GetNumCols() - 1)); - normedDeviation.Print("normedDeviation", 0, min(5, normedDeviation.GetNumRows() - 1), 0, min(10, normedDeviation.GetNumCols() - 1)); - - posterior.Print("posterior", 0, min(5, posterior.GetNumRows() - 1), 0, min(10, posterior.GetNumCols() - 1)); - functionValues.Print("functionValues", 0, min(5, functionValues.GetNumRows() - 1), 0, min(10, functionValues.GetNumCols() - 1)); - - functionValues.Print("GMMLogLikelihoodNode"); -#endif - -#if NANCHECK - functionValues.HasNan("GMMLogLikelihood"); -#endif - } - - virtual void Validate() - { - PrintSelfBeforeValidation(); - - if (m_children.size() != 4) - throw std::logic_error("GMMLogLikelihoodNode requires four inputs."); - - size_t rows[4], cols[4]; - for (int i = 0; i < 4; i++) - { - rows[i] = Inputs(i)->FunctionValues().GetNumRows(); - cols[i] = Inputs(i)->FunctionValues().GetNumCols(); - } - - if (cols[0] != cols[1] || cols[0] != cols[2]) - throw std::logic_error("GMMLogLikelihoodNode: UnnormedPrior (first input), mean (second input), and logStddev (third input) should have same number of columns."); - - if (cols[0] != 1 && cols[0] != cols[3]) - throw std::logic_error("GMMLogLikelihoodNode: UnnormedPrior (first input) should either have same number of columns as the features (fourth input) or have only one column."); - - if (rows[0] != rows[2]) - throw std::logic_error("GMMLogLikelihoodNode: UnnormedPrior (first input) should have same dimension as logStddev (third input), i.e., all dimensions in each Gaussian component share the same stddev."); - - if (rows[1] != rows[0]*rows[3]) - throw std::logic_error("GMMLogLikelihoodNode: the number of rows in mean (second input) should equal rows(unnormedPrior(first input) * rows(feature(fourth input))."); - - FunctionValues().Resize(1, cols[3]); - CopyImageSizeFromInputs(); - } - - virtual void CopyImageSizeFromInputs() - { - CopyImageSizeFromInput(3, false); - - m_outputChannels = 1; - m_outputWidth = 1; - m_outputHeight = 1; - } - - //leftNode should be the empirical - virtual void AttachInputs(const ComputationNodePtr unnormedPrior, const ComputationNodePtr mean, const ComputationNodePtr logStddev, const ComputationNodePtr feature) - { - m_children.resize(4); - m_children[0] = unnormedPrior; - m_children[1] = mean; - m_children[2] = logStddev; - m_children[3] = feature; - } - - virtual void MoveMatricesToDevice(const DEVICEID_TYPE deviceId) - { - ComputationNode::MoveMatricesToDevice(deviceId); - - if (deviceId != AUTOPLACEMATRIX) - { - if (m_prior.GetDeviceId() != deviceId) - { - m_prior.TransferFromDeviceToDevice(m_prior.GetDeviceId(), deviceId, true); - } - if (m_normedDeviation.GetDeviceId() != deviceId) - { - m_normedDeviation.TransferFromDeviceToDevice(m_normedDeviation.GetDeviceId(), deviceId, true); - } - if (m_normedDeviationVectors.GetDeviceId() != deviceId) - { - m_normedDeviationVectors.TransferFromDeviceToDevice(m_normedDeviationVectors.GetDeviceId(), deviceId, true); - } - if (m_stddev.GetDeviceId() != deviceId) - { - m_stddev.TransferFromDeviceToDevice(m_stddev.GetDeviceId(), deviceId, true); - } - if (m_posterior.GetDeviceId() != deviceId) - { - m_posterior.TransferFromDeviceToDevice(m_posterior.GetDeviceId(), deviceId, true); - } - } - } - - virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const - { - ComputationNode::CopyTo(nodeP, newName, flags); - GMMLogLikelihoodNode* node = (GMMLogLikelihoodNode*) nodeP; - - if (flags & CopyNodeFlags::copyNodeValue) - { - node->m_prior = m_prior; - node->m_normedDeviation = m_normedDeviation; - node->m_normedDeviationVectors = m_normedDeviationVectors; - node->m_stddev = m_stddev; - node->m_posterior = m_posterior; - } - } - - protected: - Matrix m_prior; - Matrix m_normedDeviation; - Matrix m_normedDeviationVectors; - Matrix m_stddev; - Matrix m_posterior; - Matrix m_temp; - }; - - template class GMMLogLikelihoodNode; - template class GMMLogLikelihoodNode; - + //calculates: the log likelihood of a feature given GMM parameters + template + class GMMLogLikelihoodNode : public ComputationNode + { + UsingComputationNodeMembers; + public: + GMMLogLikelihoodNode(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") + : ComputationNode(deviceId), m_prior(deviceId), m_normedDeviation(deviceId), m_normedDeviationVectors(deviceId), m_stddev(deviceId), m_posterior(deviceId), m_temp(deviceId) + { + m_nodeName = (name == L"" ? CreateUniqNodeName() : name); + m_deviceId = deviceId; + MoveMatricesToDevice(deviceId); + InitRecurrentNode(); + } + + GMMLogLikelihoodNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"") + : ComputationNode(deviceId), m_prior(deviceId), m_normedDeviation(deviceId), m_normedDeviationVectors(deviceId), m_stddev(deviceId), m_posterior(deviceId), m_temp(deviceId) + { + m_nodeName = (name == L"" ? CreateUniqNodeName() : name); + LoadFromFile(fstream, modelVersion, deviceId); + } + + // copy constructor + GMMLogLikelihoodNode(const GMMLogLikelihoodNode* node, const std::wstring& newName, const CopyNodeFlags flags) + : ComputationNode(node->m_deviceId), m_prior(node->m_deviceId), m_normedDeviation(node->m_deviceId), m_normedDeviationVectors(node->m_deviceId), + m_stddev(node->m_deviceId), m_posterior(node->m_deviceId), m_temp(m_deviceId) + { + node->CopyTo(this, newName, flags); + } + + virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const + { + const std::wstring& name = (newName == L"") ? NodeName() : newName; + + ComputationNodePtr node = new GMMLogLikelihoodNode(this, name, flags); + return node; + } + + virtual const std::wstring OperationName() const { return TypeName(); } + static const std::wstring TypeName() { return L"GMMLogLikelihood"; } + + virtual void ComputeInputPartial(const size_t inputIndex) + { + switch (inputIndex) + { + case 0: + ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), m_gradientValues, m_prior, m_posterior, m_temp); + break; + case 1: + ComputeInputPartialMean(Inputs(1)->GradientValues(), m_gradientValues, m_normedDeviationVectors, m_posterior, m_temp); + break; + case 2: + ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), m_gradientValues, m_normedDeviation, m_posterior, m_temp); + break; + case 3: + ComputeInputPartialFeature(Inputs(3)->GradientValues(), m_gradientValues, m_normedDeviationVectors, m_posterior, m_temp); + break; + default: + throw std::invalid_argument("GMMLogLikelihoodNode only takes four inputs."); + } + } + + virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq) + { + //get the right slice + size_t startIndex = timeIdxInSeq * m_samplesInRecurrentStep; + + size_t colsPrior = Inputs(0)->FunctionValues().GetNumCols(); + + Matrix sliceGradientValue = m_gradientValues.ColumnSlice(startIndex, m_samplesInRecurrentStep); + Matrix slicePosterior = m_posterior.ColumnSlice(startIndex, m_samplesInRecurrentStep); + + switch (inputIndex) + { + case 0: + { + if (colsPrior == 1) + ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, m_prior, slicePosterior, m_temp); + else + { + Matrix sliceUnnormedPriorGradient = Inputs(0)->GradientValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); + Matrix slicePrior = m_prior.ColumnSlice(startIndex, m_samplesInRecurrentStep); + ComputeInputPartialUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, m_temp); + } + } + break; + case 1: + { + Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.ColumnSlice(startIndex, m_samplesInRecurrentStep); + if (colsPrior == 1) + ComputeInputPartialMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); + else + { + Matrix sliceMeanGradient = Inputs(1)->GradientValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); + ComputeInputPartialMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); + } + } + break; + case 2: + { + Matrix sliceNormedDeviation = m_normedDeviation.ColumnSlice(startIndex, m_samplesInRecurrentStep); + if (colsPrior == 1) + ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); + else + { + Matrix sliceLotStddevGradient = Inputs(2)->GradientValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); + ComputeInputPartialLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); + } + } + break; + case 3: + { + Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.ColumnSlice(startIndex, m_samplesInRecurrentStep); + Matrix sliceFeatureGradient = Inputs(3)->GradientValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); + ComputeInputPartialFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); + } + break; + default: + throw std::invalid_argument("GMMLogLikelihoodNode criterion only takes four inputs."); + } + } + + static void WINAPI ComputeInputPartialUnnormedPrior(Matrix& unnormedPriorGradientValues, const Matrix& gradientValues, + const Matrix& prior, const Matrix& posterior, Matrix& temp) + { + temp.AssignDifferenceOf(posterior, prior); + temp.RowElementMultiplyWith(gradientValues); + if (prior.GetNumCols() == posterior.GetNumCols()) + { + unnormedPriorGradientValues += temp; + } + else if (prior.GetNumCols() == 1) + { + Matrix::MultiplyAndAdd(temp, false, ConstOnes(posterior.GetNumCols(), 1, unnormedPriorGradientValues.GetDeviceId()), false, unnormedPriorGradientValues); + } + else + { + throw std::runtime_error("GMMLogLikelihoodNode: UnnormedPrior should either have same number of columns as the features or have only one column."); + } + } + + static void WINAPI ComputeInputPartialMean(Matrix& meanGradientValues, const Matrix& gradientValues, const Matrix& normedDeviationVectors, + Matrix& posterior, Matrix& temp) + { + size_t numComponent = posterior.GetNumRows(); + size_t numSamples = posterior.GetNumCols(); + size_t featureSize = normedDeviationVectors.GetNumRows() / numComponent; + + temp.SetValue(normedDeviationVectors); //recall normedDeviationVectors <-- (x-u_c)/(stddev^2) + temp.Reshape(featureSize, numSamples* numComponent); + + posterior.Reshape(1, numSamples* numComponent); + temp.RowElementMultiplyWith(posterior); //temp <-- posterior * (x-u_c)/(stddev^2) + + posterior.Reshape(numComponent, numSamples); //reshape back + temp.Reshape(featureSize * numComponent, numSamples); //reshape back + + temp.RowElementMultiplyWith(gradientValues); + + if (numSamples == meanGradientValues.GetNumCols()) + { + meanGradientValues += temp; + } + else if (meanGradientValues.GetNumCols() == 1) + { + Matrix::MultiplyAndAdd(temp, false, ConstOnes(numSamples, 1, meanGradientValues.GetDeviceId()), false, meanGradientValues); + } + else + { + throw std::runtime_error("GMMLogLikelihoodNode: stddev should either have same number of columns as the features or have only one column."); + } + } + + static void WINAPI ComputeInputPartialLogStddev(Matrix& logStddevGradientValues, const Matrix& gradientValues, const Matrix& normedDeviation, + const Matrix& posterior, Matrix& temp) + { + size_t numComponent = posterior.GetNumRows(); + size_t numSamples = posterior.GetNumCols(); + + temp.AssignDifferenceOf(normedDeviation, (ElemType)numComponent); + temp.ElementMultiplyWith(posterior); + temp.RowElementMultiplyWith(gradientValues); + if (logStddevGradientValues.GetNumCols() == numSamples) + { + logStddevGradientValues += temp; + } + else if (logStddevGradientValues.GetNumCols() == 1) + { + Matrix::MultiplyAndAdd(temp, false, ConstOnes(numSamples, 1, logStddevGradientValues.GetDeviceId()), false, logStddevGradientValues); + } + else + { + throw std::runtime_error("GMMLogLikelihoodNode: stddev should either have same number of columns as the features or have only one column."); + } + } + + static void WINAPI ComputeInputPartialFeature(Matrix& featureGradientValues, const Matrix& gradientValues, const Matrix& normedDeviationVectors, + Matrix& posterior, Matrix& temp) + { + size_t numComponent = posterior.GetNumRows(); + size_t numSamples = posterior.GetNumCols(); + size_t featureSize = normedDeviationVectors.GetNumRows() / numComponent; + + temp.SetValue(normedDeviationVectors); + temp *= -1; + temp.Reshape(featureSize, numSamples* numComponent); + posterior.Reshape(1, numSamples* numComponent); + temp.RowElementMultiplyWith(posterior); + + posterior.Reshape(numComponent, numSamples); + temp.Reshape(featureSize * numComponent, numSamples); + temp.RowElementMultiplyWith(gradientValues); + + for (int i = 0; i < numComponent; i++) + featureGradientValues.AddWithRowSliceValuesOf(temp, i*featureSize, featureSize); + } + + virtual void SetFunctionAndGradientSize(const int numSamples) + { + ComputationNode::SetFunctionAndGradientSize(numSamples); + + size_t numComponents = Inputs(0)->FunctionValues().GetNumRows(); + size_t colsPrior = Inputs(0)->FunctionValues().GetNumCols(); + //size_t numSamples = Inputs(3)->FunctionValues().GetNumCols(); + size_t featureSize = Inputs(3)->FunctionValues().GetNumRows(); + + m_prior.Resize(numComponents, colsPrior); + m_stddev.Resize(numComponents, colsPrior); + m_normedDeviation.Resize(numComponents, numSamples); + m_normedDeviationVectors.Resize(numComponents * featureSize, numSamples); + m_posterior.Resize(numComponents, numSamples); + } + + //input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature + virtual void EvaluateThisNode() + { + // all internal matrices will be automatically resized since all of them are assigned to a value so no resize is needed here. + EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), + m_prior, m_stddev, m_normedDeviationVectors, m_normedDeviation, m_posterior, m_temp); + } + + //input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature + virtual void EvaluateThisNode(const size_t timeIdxInSeq) + { + size_t colsPrior = Inputs(0)->FunctionValues().GetNumCols(); + size_t numSamples = Inputs(3)->FunctionValues().GetNumCols(); + + //get the right slice + size_t startIndex = timeIdxInSeq * m_samplesInRecurrentStep; + + Matrix sliceOutputValue = m_functionValues.ColumnSlice(startIndex, m_samplesInRecurrentStep); + Matrix sliceFeature = Inputs(3)->FunctionValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); + Matrix sliceNormedDeviation = m_normedDeviation.ColumnSlice(startIndex, m_samplesInRecurrentStep); + Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.ColumnSlice(startIndex, m_samplesInRecurrentStep); + Matrix slicePosterior = m_posterior.ColumnSlice(startIndex, m_samplesInRecurrentStep); + + if (colsPrior == 1) + { + EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), sliceFeature, + m_prior, m_stddev, sliceNormedDeviationVectors, sliceNormedDeviation, slicePosterior, m_temp); + } + else if (colsPrior == numSamples) + { + Matrix sliceUnnormedPrior = Inputs(0)->FunctionValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); + Matrix sliceMean = Inputs(1)->FunctionValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); + Matrix sliceLogstddev = Inputs(2)->FunctionValues().ColumnSlice(startIndex, m_samplesInRecurrentStep); + + Matrix slicePrior = m_prior.ColumnSlice(startIndex, m_samplesInRecurrentStep); + Matrix sliceStddev = m_stddev.ColumnSlice(startIndex, m_samplesInRecurrentStep); + + EvaluateThisNodeS(sliceOutputValue, sliceUnnormedPrior, sliceMean, sliceLogstddev, sliceFeature, + slicePrior, sliceStddev, sliceNormedDeviationVectors, sliceNormedDeviation, slicePosterior, m_temp); + } + else //should not reach the code since validation should fail already + { + throw std::runtime_error("GMMLogLikelihoodNode: UnnormedPrior should either have same number of columns as the features or have only one column."); + } + + } + + //input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature + //If we want to speed up we need to replace following code with a several specialized GPU functions + static void WINAPI EvaluateThisNodeS(Matrix& functionValues, const Matrix& unnormedPrior, const Matrix& mean, Matrix& logstddev, + const Matrix& feature, Matrix& prior, Matrix& stddev, Matrix& normedDeviationVectors, + Matrix& normedDeviation, Matrix& posterior, Matrix& temp) + { + int numComponent = unnormedPrior.GetNumRows(); + size_t numSamples = feature.GetNumCols(); + size_t featureDim = feature.GetNumRows(); + + //compute prior which is softmax of unnormedPrior + prior.AssignLogSoftmaxOf(unnormedPrior, true); //log prior + + prior.InplaceExp(); + + //compute stddev + stddev.AssignExpOf(logstddev); + +#if DUMPOUTPUT + unnormedPrior.Print("unnormedPrior", 0, min(5, unnormedPrior.GetNumRows() - 1), 0, min(10, unnormedPrior.GetNumCols() - 1)); + mean.Print("mean", 0, min(5, mean.GetNumRows() - 1), 0, min(10, mean.GetNumCols() - 1)); + logstddev.Print("logstddev", 0, min(5, logstddev.GetNumRows() - 1), 0, min(10, logstddev.GetNumCols() - 1)); + + prior.Print("prior", 0, min(5, prior.GetNumRows() - 1), 0, min(10, prior.GetNumCols() - 1)); + stddev.Print("stddev", 0, min(5, stddev.GetNumRows() - 1), 0, min(10, stddev.GetNumCols() - 1)); +#endif + + //compute normedDeviation <-- ||x-u_c||^2/(stddev^2) + normedDeviationVectors.AssignRepeatOf(feature, numComponent, 1); + normedDeviationVectors -= mean; //each column of the mean has multiple mean components + normedDeviationVectors.Reshape(featureDim, numSamples* numComponent); //now each column is feature-mean_i + + normedDeviation.AssignVectorNorm2Of(normedDeviationVectors, true); + normedDeviation ^= 2; + temp.AssignRepeatOf(stddev, 1, numSamples / stddev.GetNumCols()); //stddev.GetNumCols() is either 1 or =numSamples + temp.Reshape(1, temp.GetNumElements()); //one stddev value for each component for each sample + temp ^= 2; + normedDeviation.ElementDivideBy(temp); //normedDeviation and temp have same dim (1, numSamples* numComponent) + + //compute normedDeviationVectors <-- (x-u_c)/(stddev^2) + normedDeviationVectors.RowElementDivideBy(temp); //divide twice + normedDeviationVectors.Reshape(featureDim*numComponent, numSamples); //reshape back + + //compute per-component likelihood + posterior.AssignProductOf(-0.5f, normedDeviation); //posterior <-- -||x-u_c||^2/(stddev^2)/2 and in (1, numSamples* numComponent) dim + temp.InplaceLog(); + temp *= ((ElemType)numComponent / 2.0f); //temp <-- stddev^c and in (1, numSamples* numComponent) dim + posterior -= temp; // posterior <-- exp[-||x-u_c||^2/(stddev^2)/2]/(stddev^c) + posterior -= (ElemType)(numComponent / 2.0f*log(TWO_PI)); //likelihood for each component and sample is now computed and stored in posterior + posterior.InplaceExp(); //posterior <-- exp(-||x-u_c||^2/(stddev^2)/2) + + normedDeviation.Reshape(numComponent, numSamples); //reshape back + posterior.Reshape(numComponent, numSamples); //reshape back + + //compute posterior <-- prior_i * likelihood_i + if (unnormedPrior.GetNumCols() == numSamples) //each sample has different prior + posterior.ElementMultiplyWith(prior); + else //all samples share the same prior + posterior.ColumnElementMultiplyWith(prior); + + //compute GMM log-likelihood + Matrix::Multiply(ConstOnes(1, numComponent, posterior.GetDeviceId()), false, posterior, false, functionValues); //functionValues <-- total likelihood + posterior.RowElementDivideBy(functionValues); //posterior <-- per-comp likelihood / total likelihood + functionValues.InplaceLog(); //log likelihood + +#if DUMPOUTPUT + temp.Print("temp", 0, min(5, temp.GetNumRows() - 1), 0, min(10, temp.GetNumCols() - 1)); + normedDeviation.Print("normedDeviation", 0, min(5, normedDeviation.GetNumRows() - 1), 0, min(10, normedDeviation.GetNumCols() - 1)); + + posterior.Print("posterior", 0, min(5, posterior.GetNumRows() - 1), 0, min(10, posterior.GetNumCols() - 1)); + functionValues.Print("functionValues", 0, min(5, functionValues.GetNumRows() - 1), 0, min(10, functionValues.GetNumCols() - 1)); + + functionValues.Print("GMMLogLikelihoodNode"); +#endif + +#if NANCHECK + functionValues.HasNan("GMMLogLikelihood"); +#endif + } + + virtual void Validate() + { + PrintSelfBeforeValidation(); + + if (m_children.size() != 4) + throw std::logic_error("GMMLogLikelihoodNode requires four inputs."); + + size_t rows[4], cols[4]; + for (int i = 0; i < 4; i++) + { + rows[i] = Inputs(i)->FunctionValues().GetNumRows(); + cols[i] = Inputs(i)->FunctionValues().GetNumCols(); + } + + if (cols[0] != cols[1] || cols[0] != cols[2]) + throw std::logic_error("GMMLogLikelihoodNode: UnnormedPrior (first input), mean (second input), and logStddev (third input) should have same number of columns."); + + if (cols[0] != 1 && cols[0] != cols[3]) + throw std::logic_error("GMMLogLikelihoodNode: UnnormedPrior (first input) should either have same number of columns as the features (fourth input) or have only one column."); + + if (rows[0] != rows[2]) + throw std::logic_error("GMMLogLikelihoodNode: UnnormedPrior (first input) should have same dimension as logStddev (third input), i.e., all dimensions in each Gaussian component share the same stddev."); + + if (rows[1] != rows[0]*rows[3]) + throw std::logic_error("GMMLogLikelihoodNode: the number of rows in mean (second input) should equal rows(unnormedPrior(first input) * rows(feature(fourth input))."); + + FunctionValues().Resize(1, cols[3]); + CopyImageSizeFromInputs(); + } + + virtual void CopyImageSizeFromInputs() + { + CopyImageSizeFromInput(3, false); + + m_outputChannels = 1; + m_outputWidth = 1; + m_outputHeight = 1; + } + + //leftNode should be the empirical + virtual void AttachInputs(const ComputationNodePtr unnormedPrior, const ComputationNodePtr mean, const ComputationNodePtr logStddev, const ComputationNodePtr feature) + { + m_children.resize(4); + m_children[0] = unnormedPrior; + m_children[1] = mean; + m_children[2] = logStddev; + m_children[3] = feature; + } + + virtual void MoveMatricesToDevice(const DEVICEID_TYPE deviceId) + { + ComputationNode::MoveMatricesToDevice(deviceId); + + if (deviceId != AUTOPLACEMATRIX) + { + if (m_prior.GetDeviceId() != deviceId) + { + m_prior.TransferFromDeviceToDevice(m_prior.GetDeviceId(), deviceId, true); + } + if (m_normedDeviation.GetDeviceId() != deviceId) + { + m_normedDeviation.TransferFromDeviceToDevice(m_normedDeviation.GetDeviceId(), deviceId, true); + } + if (m_normedDeviationVectors.GetDeviceId() != deviceId) + { + m_normedDeviationVectors.TransferFromDeviceToDevice(m_normedDeviationVectors.GetDeviceId(), deviceId, true); + } + if (m_stddev.GetDeviceId() != deviceId) + { + m_stddev.TransferFromDeviceToDevice(m_stddev.GetDeviceId(), deviceId, true); + } + if (m_posterior.GetDeviceId() != deviceId) + { + m_posterior.TransferFromDeviceToDevice(m_posterior.GetDeviceId(), deviceId, true); + } + } + } + + virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const + { + ComputationNode::CopyTo(nodeP, newName, flags); + GMMLogLikelihoodNode* node = (GMMLogLikelihoodNode*) nodeP; + + if (flags & CopyNodeFlags::copyNodeValue) + { + node->m_prior = m_prior; + node->m_normedDeviation = m_normedDeviation; + node->m_normedDeviationVectors = m_normedDeviationVectors; + node->m_stddev = m_stddev; + node->m_posterior = m_posterior; + } + } + + protected: + Matrix m_prior; + Matrix m_normedDeviation; + Matrix m_normedDeviationVectors; + Matrix m_stddev; + Matrix m_posterior; + Matrix m_temp; + }; + + template class GMMLogLikelihoodNode; + template class GMMLogLikelihoodNode; + }}} diff --git a/MachineLearning/CNTK/SynchronousExecutionEngine.h b/MachineLearning/CNTK/SynchronousExecutionEngine.h index 10eeaeecd..a7d2960ce 100644 --- a/MachineLearning/CNTK/SynchronousExecutionEngine.h +++ b/MachineLearning/CNTK/SynchronousExecutionEngine.h @@ -391,26 +391,43 @@ public: { std::vector inputs = EvaluateParameters(node, baseName, nodeParamStart, nodeParamCount, pass); - switch (inputs.size()) + if (cnNodeType == RowStackNode::TypeName()) //support variable length inputs { - case 1: - nodePtr->AttachInputs(ComputationNodePtr(inputs[0])); - break; - case 2: - nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1])); - break; - case 3: - nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2])); - break; - case 4: - nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3])); - break; - default: - if (nodeParamCount > 0) - RuntimeError("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str()); - break; - } + std::vector inputNodes; + inputNodes.resize(inputs.size()); + for (int i = 0; i < inputs.size(); i++) + inputNodes[i] = ComputationNodePtr(inputs[i]); + nodePtr->AttachInputs(inputNodes); + } + else + { + switch (inputs.size()) + { + case 1: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0])); + break; + case 2: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1])); + break; + case 3: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2])); + break; + case 4: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3])); + break; + case 5: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4])); + break; + case 6: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]), ComputationNodePtr(inputs[5])); + break; + default: + if (nodeParamCount > 0) + RuntimeError("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str()); + break; + } + } // process common optional parameters (like "tag"); ProcessOptionalParameters(node); break; diff --git a/Math/CNTKMathTest/CPUMatrixUnitTests.cpp b/Math/CNTKMathTest/CPUMatrixUnitTests.cpp index fadebe55e..0c9a762b9 100644 --- a/Math/CNTKMathTest/CPUMatrixUnitTests.cpp +++ b/Math/CNTKMathTest/CPUMatrixUnitTests.cpp @@ -563,7 +563,7 @@ namespace CNTKMathTest Assert::IsTrue(C.IsEqualTo(D1, 0.0001)); } - TEST_METHOD(CPUMatrixRowSlice) + TEST_METHOD(CPUMatrixRowSliceAndStack) { Matrix M0(5,3); M0(0,0) = 1; M0(0,1) = 6; M0(0,2) = 11; @@ -590,6 +590,26 @@ namespace CNTKMathTest M3 += M0; M0.AddToRowSliceValuesOf(M1, 2,2); Assert::IsTrue(M3.IsEqualTo(M0, 0.0001)); + + M2.AddWithRowSliceValuesOf(M1, 0, 2); + Matrix M4(2, 3); + M4(0, 0) = 6; M4(0, 1) = 16; M4(0, 2) = 26; + M4(1, 0) = 8; M4(1, 1) = 18; M4(1, 2) = 28; + Assert::IsTrue(M2.IsEqualTo(M4, 0.0001)); + + Matrix M5, M6, M7, M8; + M5.AssignRowSliceValuesOf(M0, 0, 2); + M6.AssignRowSliceValuesOf(M0, 2, 1); + M7.AssignRowSliceValuesOf(M0, 3, 2); + + std::vector inputMatrices; + inputMatrices.resize(3); + inputMatrices[0] = &M5; + inputMatrices[1] = &M6; + inputMatrices[2] = &M7; + M8.AssignRowStackValuesOf(inputMatrices, 0, 3); + + Assert::IsTrue(M8.IsEqualTo(M0, 0.0001)); } TEST_METHOD(CPUAssignRepeatOf) diff --git a/Math/CNTKMathTest/GPUMatrixUnitTests.cpp b/Math/CNTKMathTest/GPUMatrixUnitTests.cpp index 86033e27c..c3d25bbe5 100644 --- a/Math/CNTKMathTest/GPUMatrixUnitTests.cpp +++ b/Math/CNTKMathTest/GPUMatrixUnitTests.cpp @@ -278,7 +278,7 @@ namespace CNTKMathTest Assert::IsTrue(M2.IsEqualTo(M3, 0.0001f)); } - TEST_METHOD(GPUMatrixRowSlice) + TEST_METHOD(GPUMatrixRowSliceAndStack) { float *fArray = new float[15]; fArray[0] = 1; fArray[5] = 6; fArray[10] = 11; @@ -308,6 +308,27 @@ namespace CNTKMathTest M3 += M0; M0.AddToRowSliceValuesOf(M1, 2,2); Assert::IsTrue(M3.IsEqualTo(M0, 0.0001)); + + M2.AddWithRowSliceValuesOf(M1, 0, 2); + float *fArray4 = new float[6]; + fArray4[0] = 6; fArray4[2] = 16; fArray4[4] = 26; + fArray4[1] = 8; fArray4[3] = 18; fArray4[5] = 28; + GPUMatrix M4(2, 3, fArray4, matrixFlagNormal); + Assert::IsTrue(M2.IsEqualTo(M4, 0.0001)); + + GPUMatrix M5, M6, M7, M8; + M5.AssignRowSliceValuesOf(M0, 0, 2); + M6.AssignRowSliceValuesOf(M0, 2, 1); + M7.AssignRowSliceValuesOf(M0, 3, 2); + + std::vector *> inputMatrices; + inputMatrices.resize(3); + inputMatrices[0] = &M5; + inputMatrices[1] = &M6; + inputMatrices[2] = &M7; + M8.AssignRowStackValuesOf(inputMatrices, 0, 3); + + Assert::IsTrue(M8.IsEqualTo(M0, 0.0001)); } TEST_METHOD(GPUKhatriRaoProduct) diff --git a/Math/Math/CPUMatrix.cpp b/Math/Math/CPUMatrix.cpp index 2c34b52ac..8052fe718 100644 --- a/Math/Math/CPUMatrix.cpp +++ b/Math/Math/CPUMatrix.cpp @@ -389,6 +389,48 @@ namespace Microsoft { namespace MSR { namespace CNTK { return *this; } + //stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object. + template + CPUMatrix& CPUMatrix::AssignRowStackValuesOf(const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) + { + if (sliceNumCols == 0) + LogicError("AssignRowStackValuesOf: sliceNumCols should > 0."); + + size_t totalRows = 0; + size_t* startRowIndeces = new size_t[inputMatrices.size()]; + startRowIndeces[0] = 0; + for (int i = 0; i < inputMatrices.size(); i++) + { + const CPUMatrix& a = *inputMatrices[i]; + if (a.IsEmpty()) + LogicError("AssignRowStackValuesOf: input matrix (%d) is empty.", i); + + if (a.GetNumCols() < sliceStartCol + sliceNumCols) + LogicError("AssignRowStackValuesOf: input matrix (%d) GetNumCols() < sliceStartCol + sliceNumCols.", i); + + totalRows += a.GetNumRows(); + if (iGetNumRows() * sizeof(ElemType)); + } + } + + delete [] startRowIndeces; + + return *this; + } + template void CPUMatrix::MinusOneAt(CPUMatrix& c, const size_t position) { diff --git a/Math/Math/CPUMatrix.h b/Math/Math/CPUMatrix.h index 59cce206f..cebe2afc8 100644 --- a/Math/Math/CPUMatrix.h +++ b/Math/Math/CPUMatrix.h @@ -244,6 +244,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { CPUMatrix& AssignRowSliceValuesOf(const CPUMatrix& a, const size_t startIndex, const size_t numRows); CPUMatrix& AddToRowSliceValuesOf(const CPUMatrix& a, const size_t startIndex, const size_t numRows); CPUMatrix& AddWithRowSliceValuesOf(const CPUMatrix& a, const size_t startIndex, const size_t numRows); + CPUMatrix& AssignRowStackValuesOf(const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols); CPUMatrix& AssignRepeatOf(const CPUMatrix& a, const size_t numRowRepeats, const size_t numColRepeats); CPUMatrix& AssignPositiveAndShiftedNegSample(const CPUMatrix& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber); diff --git a/Math/Math/GPUMatrix.cu b/Math/Math/GPUMatrix.cu index fd01aa342..792bf24c4 100644 --- a/Math/Math/GPUMatrix.cu +++ b/Math/Math/GPUMatrix.cu @@ -650,6 +650,63 @@ namespace Microsoft { namespace MSR { namespace CNTK { return *this; } + //stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object. + template + GPUMatrix& GPUMatrix::AssignRowStackValuesOf(const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) + { + if (sliceNumCols == 0) + LogicError("AssignRowStackValuesOf: sliceNumCols should > 0."); + + size_t totalRows = 0; + size_t* startRowIndeces = new size_t[inputMatrices.size()+1]; + ElemType ** bufferPointersInInputMatrices = new ElemType*[inputMatrices.size()]; + + startRowIndeces[0] = 0; + + for (int i = 0; i < inputMatrices.size(); i++) + { + const GPUMatrix& a = *inputMatrices[i]; + if (a.IsEmpty()) + LogicError("AssignRowStackValuesOf: input matrix (%d) is empty.", i); + + if (a.GetNumCols() < sliceStartCol + sliceNumCols) + LogicError("AssignRowStackValuesOf: input matrix (%d) GetNumCols() < sliceStartCol + sliceNumCols.", i); + + totalRows += a.GetNumRows(); + startRowIndeces[i + 1] = startRowIndeces[i] + a.GetNumRows(); + + bufferPointersInInputMatrices[i] = a.m_pArray + a.LocateColumn(sliceStartCol); + } + + Resize(totalRows, sliceNumCols); + + PrepareDevice(); + + ElemType** bufferPointersInGPU = NULL; + CUDA_CALL(cudaMalloc((void***)&bufferPointersInGPU, inputMatrices.size()*sizeof(ElemType*))); + CUDA_CALL(cudaMemcpy(bufferPointersInGPU, bufferPointersInInputMatrices, inputMatrices.size()*sizeof(ElemType*), cudaMemcpyHostToDevice)); + delete[] bufferPointersInInputMatrices; + + size_t* startRowIndecesInGPU = NULL; + CUDA_CALL(cudaMalloc((void**)&startRowIndecesInGPU, (1+inputMatrices.size())*sizeof(size_t))); + CUDA_CALL(cudaMemcpy(startRowIndecesInGPU, startRowIndeces, (1+inputMatrices.size())*sizeof(size_t), cudaMemcpyHostToDevice)); + delete[] startRowIndeces; + + LONG64 N = (LONG64)GetNumElements(); + int blocksPerGrid = (int)ceil(1.0*N / threadsPerBlock); + cudaEvent_t done = nullptr; + if (do_sync) CUDA_CALL(cudaEventCreate(&done)); + _assignRowStackValuesOf << > >(m_pArray, bufferPointersInGPU, startRowIndecesInGPU, (long) inputMatrices.size(), N, (long)GetNumRows(), (long)GetNumCols()); + if (do_sync) CUDA_CALL(cudaEventRecord(done)); + if (do_sync) CUDA_CALL(cudaEventSynchronize(done)); + if (do_sync) CUDA_CALL(cudaEventDestroy(done)); + + CUDA_CALL(cudaFree(bufferPointersInGPU)); + CUDA_CALL(cudaFree(startRowIndecesInGPU)); + + return *this; + } + /// c = c - 1.0 for a specific position template void GPUMatrix::MinusOneAt(GPUMatrix& c, const size_t position) diff --git a/Math/Math/GPUMatrix.h b/Math/Math/GPUMatrix.h index 7d0328fd4..1e5a780cf 100644 --- a/Math/Math/GPUMatrix.h +++ b/Math/Math/GPUMatrix.h @@ -273,6 +273,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { GPUMatrix& AssignRowSliceValuesOf(const GPUMatrix& a, const size_t startIndex, const size_t numRows); GPUMatrix& AddToRowSliceValuesOf(const GPUMatrix& a, const size_t startIndex, const size_t numRows); GPUMatrix& AddWithRowSliceValuesOf(const GPUMatrix& a, const size_t startIndex, const size_t numRows); + GPUMatrix& AssignRowStackValuesOf(const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols); GPUMatrix& AssignRepeatOf(const GPUMatrix& a, const size_t numRowRepeats, const size_t numColRepeats); GPUMatrix& AssignPositiveAndShiftedNegSample(const GPUMatrix& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber); diff --git a/Math/Math/GPUMatrixCUDAKernels.cu b/Math/Math/GPUMatrixCUDAKernels.cu index 3a6e6d201..b507dd445 100644 --- a/Math/Math/GPUMatrixCUDAKernels.cu +++ b/Math/Math/GPUMatrixCUDAKernels.cu @@ -364,6 +364,27 @@ __global__ void _addWithRowSliceValuesOf(ElemType * dest, ElemType * src, const dest[id] += src[IDX2C(row + startIndex, col, srcRows)]; } +template +__global__ void _assignRowStackValuesOf(ElemType * dest, ElemType ** srces, size_t* startRowIndeces, const LONG64 numSrces, const LONG64 N, const long destRows, const long destCols) +{ + LONG64 id = blockDim.x * blockIdx.x + threadIdx.x; + if (id >= N) + return; + + long col = id / destRows; //dest is the full matrix, rowslice is taken from the src + long row = id - (col * destRows); + + //can we replace the for loop with something better? + int srcId = 0; + for (; srcId < numSrces; srcId++) + { + if (startRowIndeces[srcId + 1]>row) + break; + } + + dest[id] = srces[srcId][IDX2C(row - startRowIndeces[srcId], col, startRowIndeces[srcId+1] - startRowIndeces[srcId])]; +} + template __global__ void _assignRepeatOf(ElemType * dest, ElemType * src, const LONG64 N, const long srcRows, const long srcCols, const long destRows) { diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index ec53f4b07..29e6e9247 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -1477,6 +1477,68 @@ namespace Microsoft { namespace MSR { namespace CNTK { return *this; } + //stack the columns in inputMatrices (starting from sliceStartCol for sliceNumCols columns) and assign it to [this] object. + template + Matrix& Matrix::AssignRowStackValuesOf(const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) + { + for (int i = 0; i < inputMatrices.size(); i++) + { + const Matrix& a = *inputMatrices[i]; + DecideAndMoveToRightDevice(*this, a); + + //WARNING: a and this must have same type + if (!(GetMatrixType() == a.GetMatrixType())) + NOT_IMPLEMENTED; + } + + CurrentDataLocation curLocation = GetCurrentMatrixLocation(); + if (curLocation == CurrentDataLocation::GPU || curLocation == CurrentDataLocation::BOTH) + { + if (GetMatrixType() != MatrixType::SPARSE) + { + //GPUDense; + std::vector*> gpuInputMatrices; + gpuInputMatrices.resize(inputMatrices.size()); + for (int i = 0; i < inputMatrices.size(); i++) + gpuInputMatrices[i] = inputMatrices[i]->m_GPUMatrix; + + m_GPUMatrix->AssignRowStackValuesOf(gpuInputMatrices, sliceStartCol, sliceNumCols); + + SetDataLocation(CurrentDataLocation::GPU, MatrixType::DENSE); + } + else + { + NOT_IMPLEMENTED; + } + } + else if (curLocation == CurrentDataLocation::CPU) + { + if (GetMatrixType() != MatrixType::SPARSE) + { + //CPUDense; + std::vector*> cpuInputMatrices; + cpuInputMatrices.resize(inputMatrices.size()); + for (int i = 0; i < inputMatrices.size(); i++) + cpuInputMatrices[i] = inputMatrices[i]->m_CPUMatrix; + + m_CPUMatrix->AssignRowStackValuesOf(cpuInputMatrices, sliceStartCol, sliceNumCols); + + SetDataLocation(CurrentDataLocation::CPU, MatrixType::DENSE); + } + else + { + NOT_IMPLEMENTED; + } + } + else + { + throw std::runtime_error("Matrices do not exist in either CPU or GPU."); + } + + return *this; + } + + template Matrix& Matrix::AssignRepeatOf(const Matrix& a, const size_t numRowRepeats, const size_t numColRepeats) { diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 033647ac0..aa224cafc 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -255,6 +255,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix& AssignRowSliceValuesOf(const Matrix& a, const size_t startIndex, const size_t numRows); Matrix& AddToRowSliceValuesOf(const Matrix& a, const size_t startIndex, const size_t numRows); Matrix& AddWithRowSliceValuesOf(const Matrix& a, const size_t startIndex, const size_t numRows); + Matrix& AssignRowStackValuesOf(const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols); Matrix& AssignRepeatOf(const Matrix& a, const size_t numRowRepeats, const size_t numColRepeats); Matrix& AssignPositiveAndShiftedNegSample(const Matrix& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber); diff --git a/Math/Math/NoGPU.cpp b/Math/Math/NoGPU.cpp index e120c7f08..05c78b5f9 100644 --- a/Math/Math/NoGPU.cpp +++ b/Math/Math/NoGPU.cpp @@ -477,6 +477,7 @@ namespace Microsoft { //for each column of a, we add all rows of a to this starting from startIndex template GPUMatrix& GPUMatrix::AddToRowSliceValuesOf(const GPUMatrix& /*a*/, const size_t startIndex, const size_t numRows) { return *this; } template GPUMatrix& GPUMatrix::AddWithRowSliceValuesOf(const GPUMatrix& /*a*/, const size_t startIndex, const size_t numRows) { return *this; } + GPUMatrix& AssignRowStackValuesOf(const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) { return *this; } template GPUMatrix& GPUMatrix::AssignRepeatOf(const GPUMatrix& /*a*/, const size_t numRowRepeats, const size_t numColRepeats) { return *this; } template GPUMatrix& GPUMatrix::AssignPositiveAndShiftedNegSample(const GPUMatrix& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber) { return *this; } From a391ea7a7d0aa3040d1fb1c9a2048373ed4472b1 Mon Sep 17 00:00:00 2001 From: Dong Yu Date: Thu, 11 Jun 2015 18:48:35 -0700 Subject: [PATCH 15/21] ignore this. For some reason I have to commit it to be able to merge with the server. --- Scripts/build-and-test | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 Scripts/build-and-test diff --git a/Scripts/build-and-test b/Scripts/build-and-test old mode 100755 new mode 100644 From 8d0a82b5047f59ff1c80f1d64661b3ebfd9b87f3 Mon Sep 17 00:00:00 2001 From: Marko Radmilac Date: Thu, 11 Jun 2015 17:38:50 -0700 Subject: [PATCH 16/21] Add back chmod for config file, it is needed on Windows/Cygwin --- Scripts/build-and-test | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Scripts/build-and-test b/Scripts/build-and-test index 4b452dd9d..669a28a19 100644 --- a/Scripts/build-and-test +++ b/Scripts/build-and-test @@ -119,6 +119,10 @@ cd $CNTK_ROOT if ! [[ -f $CONF_FILE ]]; then cp Demos/Simple/Simple.config $CONF_FILE || exit $? + + # This chmod is necessary due to restrictive Cygwin interpretation of Windows permissions. + # Cygwin interprets Windows permissions as ----rwx---, which lacks read permissions for user. + chmod a+r $CONF_FILE || exit $? fi if [[ $QUIET_BUILD == 1 ]]; then From 44b510d9e819e5d11e01dee71260423dfef6b58d Mon Sep 17 00:00:00 2001 From: Marko Radmilac Date: Fri, 12 Jun 2015 11:43:31 -0700 Subject: [PATCH 17/21] Reset build script to proper Linux permissions --- Scripts/build-and-test | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 Scripts/build-and-test diff --git a/Scripts/build-and-test b/Scripts/build-and-test old mode 100644 new mode 100755 From 34fdae054ce69a1a30f4ac009d87ff979fa89e5d Mon Sep 17 00:00:00 2001 From: amitaga Date: Fri, 12 Jun 2015 14:10:27 -0700 Subject: [PATCH 18/21] Fixed Linux build --- MachineLearning/CNTK/ComputationNetwork.h | 5 +++-- MachineLearning/CNTK/ComputationNode.h | 4 +++- MachineLearning/CNTK/LinearAlgebraNodes.h | 2 +- MachineLearning/CNTK/SynchronousExecutionEngine.h | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/MachineLearning/CNTK/ComputationNetwork.h b/MachineLearning/CNTK/ComputationNetwork.h index 3b8c515b6..b024d20b9 100644 --- a/MachineLearning/CNTK/ComputationNetwork.h +++ b/MachineLearning/CNTK/ComputationNetwork.h @@ -41,6 +41,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { protected: typedef ComputationNode* ComputationNodePtr; + typedef const ComputationNode* ConstComputationNodePtr; typedef std::pair ComputationArc; typedef struct stRecurrentInfo{ std::vector m_recurrentNodes; @@ -548,7 +549,7 @@ public: } ComputationNodePtr nodePtr = GetNodeFromName(nodeName); - std::vector childrenNodes; + std::vector childrenNodes; childrenNodes.resize(numChildren); for (int j = 0; j < numChildren; j++) childrenNodes[j] = GetNodeFromName(childrenNames[j]); @@ -1530,7 +1531,7 @@ public: return newNode; } - ComputationNodePtr RowStack(const std::vector inputs, const std::wstring nodeName = L"") + ComputationNodePtr RowStack(const std::vector inputs, const std::wstring nodeName = L"") { ComputationNodePtr newNode(new RowStackNode(m_deviceId, nodeName)); newNode->AttachInputs(inputs); diff --git a/MachineLearning/CNTK/ComputationNode.h b/MachineLearning/CNTK/ComputationNode.h index 55471acd3..a78f76a24 100644 --- a/MachineLearning/CNTK/ComputationNode.h +++ b/MachineLearning/CNTK/ComputationNode.h @@ -57,6 +57,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { protected: //std containers such as list and map does not support class reference so we need to use pointer typedef ComputationNode* ComputationNodePtr; + typedef const ComputationNode* ConstComputationNodePtr; typedef std::pair ComputationArc; int m_loopId; size_t m_samplesInRecurrentStep; @@ -152,7 +153,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::logic_error("This operation does not support six inputs."); } - virtual void AttachInputs(const std::vector& /*inputs*/) + virtual void AttachInputs(const std::vector& /*inputs*/) { throw std::logic_error("This operation does not support variable-length inputs."); } @@ -919,6 +920,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { typedef ComputationNode B; \ protected: \ typedef ComputationNode* ComputationNodePtr; \ + typedef const ComputationNode* ConstComputationNodePtr; \ public: \ using B::AttachInputs; using B::ChildrenNeedGradient; using B::ChildrenSize; using B::ClearGradientForChildren; \ using B::ComputeGradientForChildren; using B::ComputeInputPartial; using B::ConstOnes; using B::CopyImageSizeFromInput; \ diff --git a/MachineLearning/CNTK/LinearAlgebraNodes.h b/MachineLearning/CNTK/LinearAlgebraNodes.h index ffbda78ea..7961766e3 100644 --- a/MachineLearning/CNTK/LinearAlgebraNodes.h +++ b/MachineLearning/CNTK/LinearAlgebraNodes.h @@ -574,7 +574,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { fprintf(stderr, "WARNING: RowStack operation cannot inherit image size information from its child. Image size info is lost.\n"); } - virtual void AttachInputs(const std::vector& inputs) + virtual void AttachInputs(const std::vector& inputs) { unsigned int numInputs = inputs.size(); m_children.resize(numInputs); diff --git a/MachineLearning/CNTK/SynchronousExecutionEngine.h b/MachineLearning/CNTK/SynchronousExecutionEngine.h index a7d2960ce..72491d131 100644 --- a/MachineLearning/CNTK/SynchronousExecutionEngine.h +++ b/MachineLearning/CNTK/SynchronousExecutionEngine.h @@ -393,7 +393,7 @@ public: if (cnNodeType == RowStackNode::TypeName()) //support variable length inputs { - std::vector inputNodes; + std::vector inputNodes; inputNodes.resize(inputs.size()); for (int i = 0; i < inputs.size(); i++) inputNodes[i] = ComputationNodePtr(inputs[i]); @@ -734,6 +734,7 @@ public: private: ComputationNetwork& m_net; typedef ComputationNode* ComputationNodePtr; + typedef const ComputationNode* ConstComputationNodePtr; void operator=(const SynchronousNodeEvaluator&); }; From 892229b17006ab5197e48f3ed8fcabafbfca5996 Mon Sep 17 00:00:00 2001 From: amitaga Date: Fri, 12 Jun 2015 14:18:43 -0700 Subject: [PATCH 19/21] Revert "Fixed Linux build" This reverts commit 34fdae054ce69a1a30f4ac009d87ff979fa89e5d. --- MachineLearning/CNTK/ComputationNetwork.h | 5 ++--- MachineLearning/CNTK/ComputationNode.h | 4 +--- MachineLearning/CNTK/LinearAlgebraNodes.h | 2 +- MachineLearning/CNTK/SynchronousExecutionEngine.h | 3 +-- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/MachineLearning/CNTK/ComputationNetwork.h b/MachineLearning/CNTK/ComputationNetwork.h index b024d20b9..3b8c515b6 100644 --- a/MachineLearning/CNTK/ComputationNetwork.h +++ b/MachineLearning/CNTK/ComputationNetwork.h @@ -41,7 +41,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { { protected: typedef ComputationNode* ComputationNodePtr; - typedef const ComputationNode* ConstComputationNodePtr; typedef std::pair ComputationArc; typedef struct stRecurrentInfo{ std::vector m_recurrentNodes; @@ -549,7 +548,7 @@ public: } ComputationNodePtr nodePtr = GetNodeFromName(nodeName); - std::vector childrenNodes; + std::vector childrenNodes; childrenNodes.resize(numChildren); for (int j = 0; j < numChildren; j++) childrenNodes[j] = GetNodeFromName(childrenNames[j]); @@ -1531,7 +1530,7 @@ public: return newNode; } - ComputationNodePtr RowStack(const std::vector inputs, const std::wstring nodeName = L"") + ComputationNodePtr RowStack(const std::vector inputs, const std::wstring nodeName = L"") { ComputationNodePtr newNode(new RowStackNode(m_deviceId, nodeName)); newNode->AttachInputs(inputs); diff --git a/MachineLearning/CNTK/ComputationNode.h b/MachineLearning/CNTK/ComputationNode.h index a78f76a24..55471acd3 100644 --- a/MachineLearning/CNTK/ComputationNode.h +++ b/MachineLearning/CNTK/ComputationNode.h @@ -57,7 +57,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { protected: //std containers such as list and map does not support class reference so we need to use pointer typedef ComputationNode* ComputationNodePtr; - typedef const ComputationNode* ConstComputationNodePtr; typedef std::pair ComputationArc; int m_loopId; size_t m_samplesInRecurrentStep; @@ -153,7 +152,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::logic_error("This operation does not support six inputs."); } - virtual void AttachInputs(const std::vector& /*inputs*/) + virtual void AttachInputs(const std::vector& /*inputs*/) { throw std::logic_error("This operation does not support variable-length inputs."); } @@ -920,7 +919,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { typedef ComputationNode B; \ protected: \ typedef ComputationNode* ComputationNodePtr; \ - typedef const ComputationNode* ConstComputationNodePtr; \ public: \ using B::AttachInputs; using B::ChildrenNeedGradient; using B::ChildrenSize; using B::ClearGradientForChildren; \ using B::ComputeGradientForChildren; using B::ComputeInputPartial; using B::ConstOnes; using B::CopyImageSizeFromInput; \ diff --git a/MachineLearning/CNTK/LinearAlgebraNodes.h b/MachineLearning/CNTK/LinearAlgebraNodes.h index 7961766e3..ffbda78ea 100644 --- a/MachineLearning/CNTK/LinearAlgebraNodes.h +++ b/MachineLearning/CNTK/LinearAlgebraNodes.h @@ -574,7 +574,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { fprintf(stderr, "WARNING: RowStack operation cannot inherit image size information from its child. Image size info is lost.\n"); } - virtual void AttachInputs(const std::vector& inputs) + virtual void AttachInputs(const std::vector& inputs) { unsigned int numInputs = inputs.size(); m_children.resize(numInputs); diff --git a/MachineLearning/CNTK/SynchronousExecutionEngine.h b/MachineLearning/CNTK/SynchronousExecutionEngine.h index 72491d131..a7d2960ce 100644 --- a/MachineLearning/CNTK/SynchronousExecutionEngine.h +++ b/MachineLearning/CNTK/SynchronousExecutionEngine.h @@ -393,7 +393,7 @@ public: if (cnNodeType == RowStackNode::TypeName()) //support variable length inputs { - std::vector inputNodes; + std::vector inputNodes; inputNodes.resize(inputs.size()); for (int i = 0; i < inputs.size(); i++) inputNodes[i] = ComputationNodePtr(inputs[i]); @@ -734,7 +734,6 @@ public: private: ComputationNetwork& m_net; typedef ComputationNode* ComputationNodePtr; - typedef const ComputationNode* ConstComputationNodePtr; void operator=(const SynchronousNodeEvaluator&); }; From e1566298d55bec28c1fa28fdd66d1468060b6a93 Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Fri, 12 Jun 2015 14:39:06 -0700 Subject: [PATCH 20/21] Fixed Linux build --- MachineLearning/CNTK/ComputationNetwork.h | 4 ++-- MachineLearning/CNTK/ComputationNode.h | 2 +- MachineLearning/CNTK/LinearAlgebraNodes.h | 2 +- MachineLearning/CNTK/SynchronousExecutionEngine.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/MachineLearning/CNTK/ComputationNetwork.h b/MachineLearning/CNTK/ComputationNetwork.h index 3b8c515b6..2ae6a0851 100644 --- a/MachineLearning/CNTK/ComputationNetwork.h +++ b/MachineLearning/CNTK/ComputationNetwork.h @@ -548,7 +548,7 @@ public: } ComputationNodePtr nodePtr = GetNodeFromName(nodeName); - std::vector childrenNodes; + std::vector childrenNodes; childrenNodes.resize(numChildren); for (int j = 0; j < numChildren; j++) childrenNodes[j] = GetNodeFromName(childrenNames[j]); @@ -1530,7 +1530,7 @@ public: return newNode; } - ComputationNodePtr RowStack(const std::vector inputs, const std::wstring nodeName = L"") + ComputationNodePtr RowStack(const std::vector inputs, const std::wstring nodeName = L"") { ComputationNodePtr newNode(new RowStackNode(m_deviceId, nodeName)); newNode->AttachInputs(inputs); diff --git a/MachineLearning/CNTK/ComputationNode.h b/MachineLearning/CNTK/ComputationNode.h index 55471acd3..888060edd 100644 --- a/MachineLearning/CNTK/ComputationNode.h +++ b/MachineLearning/CNTK/ComputationNode.h @@ -152,7 +152,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { throw std::logic_error("This operation does not support six inputs."); } - virtual void AttachInputs(const std::vector& /*inputs*/) + virtual void AttachInputs(const std::vector& /*inputs*/) { throw std::logic_error("This operation does not support variable-length inputs."); } diff --git a/MachineLearning/CNTK/LinearAlgebraNodes.h b/MachineLearning/CNTK/LinearAlgebraNodes.h index ffbda78ea..d2b3d3302 100644 --- a/MachineLearning/CNTK/LinearAlgebraNodes.h +++ b/MachineLearning/CNTK/LinearAlgebraNodes.h @@ -574,7 +574,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { fprintf(stderr, "WARNING: RowStack operation cannot inherit image size information from its child. Image size info is lost.\n"); } - virtual void AttachInputs(const std::vector& inputs) + virtual void AttachInputs(const std::vector& inputs) { unsigned int numInputs = inputs.size(); m_children.resize(numInputs); diff --git a/MachineLearning/CNTK/SynchronousExecutionEngine.h b/MachineLearning/CNTK/SynchronousExecutionEngine.h index a7d2960ce..24bf24f08 100644 --- a/MachineLearning/CNTK/SynchronousExecutionEngine.h +++ b/MachineLearning/CNTK/SynchronousExecutionEngine.h @@ -393,7 +393,7 @@ public: if (cnNodeType == RowStackNode::TypeName()) //support variable length inputs { - std::vector inputNodes; + std::vector inputNodes; inputNodes.resize(inputs.size()); for (int i = 0; i < inputs.size(); i++) inputNodes[i] = ComputationNodePtr(inputs[i]); From faf6925bdc57d9c2ea44db1241a572aac1af5a96 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Tue, 16 Jun 2015 10:10:09 -0700 Subject: [PATCH 21/21] Fix the compile error on windows --- MachineLearning/CNTK/RecurrentNodes.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MachineLearning/CNTK/RecurrentNodes.h b/MachineLearning/CNTK/RecurrentNodes.h index abb890e78..0d382b164 100644 --- a/MachineLearning/CNTK/RecurrentNodes.h +++ b/MachineLearning/CNTK/RecurrentNodes.h @@ -218,10 +218,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (colBegin(i,0) == SENTENCE_MIDDLE) { - Matrix to1 = inputGradientValues.ColumnSlice((timeIdxInSeq - delay)*mNbr + i, 1); - Matrix frm1= gradientValues.ColumnSlice(timeIdxInSeq * mNbr + i, 1); + Matrix frm = gradientValues.ColumnSlice(timeIdxInSeq * mNbr + i, 1); + Matrix to = inputGradientValues.ColumnSlice((timeIdxInSeq - delay)*mNbr + i, 1); - to1 += frm1; + to += frm; } }