This commit is contained in:
Rui Zhao (SPEECH) 2019-05-22 10:52:47 -07:00
Родитель 35935a189c
Коммит d0675c2731
2 изменённых файлов: 27 добавлений и 8 удалений

Просмотреть файл

@ -4702,6 +4702,12 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRNNTScore(const GPUMatrix<ElemTy
// Max number of phones in utterances in this minibatch
//size_t maxPhoneNum = phoneSeq.GetNumRows();
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
size_t* gpuFrameNum;
CUDA_CALL(cudaMalloc((void**) &gpuFrameNum, uttNum * sizeof(size_t)));
CUDA_CALL(cudaMemcpy(gpuFrameNum, uttFrameNum.data(), uttNum * sizeof(size_t), cudaMemcpyHostToDevice));
@ -4738,7 +4744,7 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRNNTScore(const GPUMatrix<ElemTy
int blocksPerGrid = (int) ceil(1.0 * uttNum / GridDim::maxThreadsPerBlock);
//_AssignSequenceError<<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, t_stream>>>(hsmoothingWeight, Data(), label.Data(), dnnoutput.Data(), gamma.Data(), alpha, N);
cudaEventRecord(start);
for (size_t t = 0; t < maxFrameNum; t++)
{
for (size_t u = 0; u < maxPhoneNum; u++)
@ -4754,15 +4760,21 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRNNTScore(const GPUMatrix<ElemTy
gpuFrameNum, gpuPhoneNum, gpuBeginFrame, gpuFrameToChanInd, gpuUttBeginForMergedinput, numParallelSequences, t, u,
maxPhoneNum, totalPhoneNum, blankTokenId, uttNum);
}
cudaEventRecord(stop);
cudaEventSynchronize(stop);
float milliseconds = 0;
cudaEventElapsedTime(&milliseconds, start, stop);
//beta.Print("beta");
//alpha.Print("alpha");
ElemType zerVar = 0.0;
printf("time for fb:%f\n", milliseconds);
//beta.Print("beta");
//alpha.Print("alpha");
ElemType zerVar = 0.0;
totalScore.SetColumn(&zerVar, 0);
_assignRNNTTotalScore<<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, t_stream>>>(alpha.Data(), beta.Data(), totalScore.Data(), uttNum, gpuFrameNum, gpuFrameToChanInd, gpuBeginFrame, numParallelSequences, maxPhoneNum);
this->SetValue(0.0);
cudaEventRecord(start);
// x dimension is for each phone
// y dimention is for each time
// Ensure that we allocate correct number of blocks for given number of utterances and max number of phones in those utterances
@ -4785,7 +4797,11 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::AssignRNNTScore(const GPUMatrix<ElemTy
_assignRNNTScoreS3<<<block_tail, thread_tail, 0, t_stream>>>(Data(), alpha.Data(), beta.Data(), phoneSeq.Data(), uttFrameNum[s], uttPhoneNum[s], uttFrameBeginIdx[s], uttFrameToChanInd[s],
uttBeginForOutputditribution[s], numParallelSequences, maxPhoneNum, totalPhoneNum, blankTokenId, s);
}
cudaEventRecord(stop);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&milliseconds, start, stop);
printf("time for error cal:%f\n", milliseconds);
CUDA_CALL(cudaFree(gpuFrameNum));
CUDA_CALL(cudaFree(gpuPhoneNum));
CUDA_CALL(cudaFree(gpuBeginFrame));

Просмотреть файл

@ -539,18 +539,21 @@ public:
// totalcol, numParallelSequences, numPhoneParallelSequences);
//matrixOutputDistribution.Print("h");
//log softmax of f+g
mergedinput.InplaceLogSoftmax(true);
//mergedinput.InplaceLogSoftmax(true);
Microsoft::MSR::CNTK::Matrix<ElemType> logsoftmax(m_deviceid_gpu);
logsoftmax.SetValue(mergedinput);
logsoftmax.InplaceLogSoftmax(true);
//matrixOutputDistribution.Print("prob");
// forward backward to compute alpha, beta derivaitves
Microsoft::MSR::CNTK::Matrix<ElemType> alpha(m_deviceid_gpu);
Microsoft::MSR::CNTK::Matrix<ElemType> beta(m_deviceid_gpu);
m_derivative.TransferToDeviceIfNotThere(m_deviceid_gpu);
m_derivative.AssignRNNTScore(mergedinput, alpha, beta, matrixPhoneSeqs, matrixPhoneSeqs, uttFrameToChanInd, uttFrameBeginIdx, uttBeginForOutputditribution, uttPhoneToChanInd, uttPhoneBeginIdx,
m_derivative.AssignRNNTScore(logsoftmax, alpha, beta, matrixPhoneSeqs, matrixPhoneSeqs, uttFrameToChanInd, uttFrameBeginIdx, uttBeginForOutputditribution, uttPhoneToChanInd, uttPhoneBeginIdx,
uttFrameNum, uttPhoneNum, numParallelSequences, numPhoneParallelSequences, maxPhoneNum, maxFrameNum, totalScore, blankTokenId, -1,true);
mergedinput.InplaceExp();
m_derivative.AssignElementProductOf(m_derivative, mergedinput);
logsoftmax.InplaceExp();
m_derivative.AssignElementProductOf(m_derivative, logsoftmax);
ElemType finalscore = 0;
//m_derivative.Print("RNNT");
finalscore = totalScore.Get00Element();