fixed a bug in GPUMatrix::RowElementMultiplyWith and RowElementDivideBy. Changed ComputationNetwork.h to support loading nodes with number of children larger than 3.
This commit is contained in:
Родитель
0ee36af4bb
Коммит
a6d0f8176d
|
@ -5,6 +5,7 @@
|
|||
*.suo
|
||||
*.user
|
||||
*.sln.docstates
|
||||
*.orig
|
||||
|
||||
# Build results
|
||||
|
||||
|
|
|
@ -2135,7 +2135,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
else //all samples share the same prior
|
||||
posterior.ColumnElementMultiplyWith(prior);
|
||||
|
||||
|
||||
//compute GMM log-likelihood
|
||||
Matrix<ElemType>::Multiply(ConstOnes(1, numComponent, posterior.GetDeviceId()), false, posterior, false, functionValues); //functionValues <-- total likelihood
|
||||
posterior.RowElementDivideBy(functionValues); //posterior <-- per-comp likelihood / total likelihood
|
||||
|
|
|
@ -336,7 +336,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
ComputationNodePtr nodePtr = GetNodeFromName(nodeName);
|
||||
ComputationNodePtr childNodePtr0, childNodePtr1, childNodePtr2;
|
||||
ComputationNodePtr childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4;
|
||||
switch (numChildren)
|
||||
{
|
||||
case 1:
|
||||
|
@ -354,6 +354,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
childNodePtr2 = GetNodeFromName(childrenNames[2]);
|
||||
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2);
|
||||
break;
|
||||
case 4:
|
||||
childNodePtr0 = GetNodeFromName(childrenNames[0]);
|
||||
childNodePtr1 = GetNodeFromName(childrenNames[1]);
|
||||
childNodePtr2 = GetNodeFromName(childrenNames[2]);
|
||||
childNodePtr3 = GetNodeFromName(childrenNames[3]);
|
||||
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3);
|
||||
break;
|
||||
case 5:
|
||||
childNodePtr0 = GetNodeFromName(childrenNames[0]);
|
||||
childNodePtr1 = GetNodeFromName(childrenNames[1]);
|
||||
childNodePtr2 = GetNodeFromName(childrenNames[2]);
|
||||
childNodePtr3 = GetNodeFromName(childrenNames[3]);
|
||||
childNodePtr4 = GetNodeFromName(childrenNames[4]);
|
||||
nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4);
|
||||
break;
|
||||
default:
|
||||
throw std::logic_error("Invalid number of children.");
|
||||
}
|
||||
|
|
|
@ -1371,9 +1371,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols()))
|
||||
throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns.");
|
||||
|
||||
long N=(long)a.GetNumRows();
|
||||
long M=(long)this->GetNumCols();
|
||||
int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock);
|
||||
long N = (long)this->GetNumRows();
|
||||
long M = (long)a.GetNumCols();
|
||||
int blocksPerGrid = (int)ceil(1.0*M / threadsPerBlock);
|
||||
a.PrepareDevice();
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
|
||||
|
@ -1394,9 +1394,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols()))
|
||||
throw std::invalid_argument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns.");
|
||||
|
||||
long N = (long)a.GetNumRows();
|
||||
long M = (long)this->GetNumCols();
|
||||
int blocksPerGrid = (int)ceil(1.0*N / threadsPerBlock);
|
||||
long N = (long)this->GetNumRows();
|
||||
long M = (long)a.GetNumCols();
|
||||
int blocksPerGrid = (int)ceil(1.0*M / threadsPerBlock);
|
||||
a.PrepareDevice();
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
|
||||
|
@ -1417,9 +1417,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1))
|
||||
throw std::invalid_argument("ColumnElementDivideBy: The input matrix should be a col vector and match [this]'s rows.");
|
||||
|
||||
long N=(long)a.GetNumRows();
|
||||
long M=(long)this->GetNumCols();
|
||||
int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock);
|
||||
long N = (long)a.GetNumRows();
|
||||
long M = (long)this->GetNumCols();
|
||||
int blocksPerGrid = (int)ceil(1.0*N / threadsPerBlock);
|
||||
a.PrepareDevice();
|
||||
cudaEvent_t done = nullptr;
|
||||
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
|
||||
|
|
|
@ -1737,7 +1737,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (a.IsEmpty() || IsEmpty())
|
||||
throw std::logic_error("RowElementMultiplyWith: Matrix is empty.");
|
||||
|
||||
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1))
|
||||
if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1))
|
||||
throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns.");
|
||||
|
||||
//WARNING: a and this must have same type
|
||||
|
@ -1763,7 +1763,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (a.IsEmpty() || IsEmpty())
|
||||
throw std::logic_error("RowElementDivideBy: Matrix is empty.");
|
||||
|
||||
if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1))
|
||||
if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1))
|
||||
throw std::invalid_argument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns.");
|
||||
|
||||
//WARNING: a and this must have same type
|
||||
|
|
Загрузка…
Ссылка в новой задаче