fixed a bug in GPUMatrix::RowElementMultiplyWith and RowElementDivideBy. Changed ComputationNetwork.h to support loading nodes with number of children larger than 3.

This commit is contained in:
Dong Yu 2014-10-15 19:00:30 -07:00
Родитель 0ee36af4bb
Коммит a6d0f8176d
5 изменённых файлов: 28 добавлений и 13 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -5,6 +5,7 @@
*.suo *.suo
*.user *.user
*.sln.docstates *.sln.docstates
*.orig
# Build results # Build results

Просмотреть файл

@ -2135,7 +2135,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
else //all samples share the same prior else //all samples share the same prior
posterior.ColumnElementMultiplyWith(prior); posterior.ColumnElementMultiplyWith(prior);
//compute GMM log-likelihood //compute GMM log-likelihood
Matrix<ElemType>::Multiply(ConstOnes(1, numComponent, posterior.GetDeviceId()), false, posterior, false, functionValues); //functionValues <-- total likelihood Matrix<ElemType>::Multiply(ConstOnes(1, numComponent, posterior.GetDeviceId()), false, posterior, false, functionValues); //functionValues <-- total likelihood
posterior.RowElementDivideBy(functionValues); //posterior <-- per-comp likelihood / total likelihood posterior.RowElementDivideBy(functionValues); //posterior <-- per-comp likelihood / total likelihood

Просмотреть файл

@ -336,7 +336,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
} }
ComputationNodePtr nodePtr = GetNodeFromName(nodeName); ComputationNodePtr nodePtr = GetNodeFromName(nodeName);
ComputationNodePtr childNodePtr0, childNodePtr1, childNodePtr2; ComputationNodePtr childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4;
switch (numChildren) switch (numChildren)
{ {
case 1: case 1:
@ -354,6 +354,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
childNodePtr2 = GetNodeFromName(childrenNames[2]); childNodePtr2 = GetNodeFromName(childrenNames[2]);
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2); nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2);
break; break;
case 4:
childNodePtr0 = GetNodeFromName(childrenNames[0]);
childNodePtr1 = GetNodeFromName(childrenNames[1]);
childNodePtr2 = GetNodeFromName(childrenNames[2]);
childNodePtr3 = GetNodeFromName(childrenNames[3]);
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3);
break;
case 5:
childNodePtr0 = GetNodeFromName(childrenNames[0]);
childNodePtr1 = GetNodeFromName(childrenNames[1]);
childNodePtr2 = GetNodeFromName(childrenNames[2]);
childNodePtr3 = GetNodeFromName(childrenNames[3]);
childNodePtr4 = GetNodeFromName(childrenNames[4]);
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4);
break;
default: default:
throw std::logic_error("Invalid number of children."); throw std::logic_error("Invalid number of children.");
} }

Просмотреть файл

@ -1371,9 +1371,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols())) if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols()))
throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns."); throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns.");
long N=(long)a.GetNumRows(); long N = (long)this->GetNumRows();
long M=(long)this->GetNumCols(); long M = (long)a.GetNumCols();
int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); int blocksPerGrid = (int)ceil(1.0*M / threadsPerBlock);
a.PrepareDevice(); a.PrepareDevice();
cudaEvent_t done = nullptr; cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done)); if (do_sync) CUDA_CALL(cudaEventCreate(&done));
@ -1394,9 +1394,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols())) if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols()))
throw std::invalid_argument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns."); throw std::invalid_argument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns.");
long N = (long)a.GetNumRows(); long N = (long)this->GetNumRows();
long M = (long)this->GetNumCols(); long M = (long)a.GetNumCols();
int blocksPerGrid = (int)ceil(1.0*N / threadsPerBlock); int blocksPerGrid = (int)ceil(1.0*M / threadsPerBlock);
a.PrepareDevice(); a.PrepareDevice();
cudaEvent_t done = nullptr; cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done)); if (do_sync) CUDA_CALL(cudaEventCreate(&done));
@ -1417,9 +1417,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1)) if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1))
throw std::invalid_argument("ColumnElementDivideBy: The input matrix should be a col vector and match [this]'s rows."); throw std::invalid_argument("ColumnElementDivideBy: The input matrix should be a col vector and match [this]'s rows.");
long N=(long)a.GetNumRows(); long N = (long)a.GetNumRows();
long M=(long)this->GetNumCols(); long M = (long)this->GetNumCols();
int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); int blocksPerGrid = (int)ceil(1.0*N / threadsPerBlock);
a.PrepareDevice(); a.PrepareDevice();
cudaEvent_t done = nullptr; cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done)); if (do_sync) CUDA_CALL(cudaEventCreate(&done));

Просмотреть файл

@ -1737,7 +1737,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (a.IsEmpty() || IsEmpty()) if (a.IsEmpty() || IsEmpty())
throw std::logic_error("RowElementMultiplyWith: Matrix is empty."); throw std::logic_error("RowElementMultiplyWith: Matrix is empty.");
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1)) if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1))
throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns."); throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns.");
//WARNING: a and this must have same type //WARNING: a and this must have same type
@ -1763,7 +1763,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (a.IsEmpty() || IsEmpty()) if (a.IsEmpty() || IsEmpty())
throw std::logic_error("RowElementDivideBy: Matrix is empty."); throw std::logic_error("RowElementDivideBy: Matrix is empty.");
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1)) if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1))
throw std::invalid_argument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns."); throw std::invalid_argument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns.");
//WARNING: a and this must have same type //WARNING: a and this must have same type