CuDNN RNN move to CUDNN5 Release Version

cudnn-7.5-windows10-x64-v5.0-ga

This required the following changes
- CuDnnRNN::SetLength no longer necessary, the RNN descriptor is length-agnostic.
- The sequence length is now a parameter of the backward and forward cudnn calls, so it is stored as a member function in the executor object. It is reset at each call of the forward pass.
- dims[] and strides[] in cudnnSetTensorNDDescriptor have been re-arranged. But, data layout should be the same.
- other API calls experienced minor changes
This commit is contained in:
Jasha Droppo 2016-05-19 14:18:06 -07:00
Родитель 8dcfa4337c
Коммит 1360f0fb15
4 изменённых файлов: 27 добавлений и 45 удалений

Просмотреть файл

@ -43,24 +43,6 @@ public:
DISABLE_COPY_AND_MOVE(CuDnnTensorDescriptor);
};
template<class ElemType>
void CuDnnRNN<ElemType>::SetLength(size_t len)
{
if (m_seqLength != len)
{
m_seqLength = len;
CUDNN_CALL(cudnnSetRNNDescriptor(m_rnnDesc,
(int)m_rnnParameters.m_hiddenSize,
(int)m_seqLength,
(int)m_rnnParameters.m_numLayers,
m_dropout,
CUDNN_LINEAR_INPUT, // We can also skip the input matrix transformation
m_rnnParameters.m_bidirectional ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL,
GetMode(),
m_dataType));
}
}
template <class ElemType>
void CuDnnRNNExecutor<ElemType>::SetDescriptors(size_t dim, const vector<size_t>& numSequencesForFrame, vector<cudnnTensorDescriptor_t>& descriptors)
{
@ -71,8 +53,8 @@ void CuDnnRNNExecutor<ElemType>::SetDescriptors(size_t dim, const vector<size_t>
descriptors.push_back(cudnnTensorDescriptor_t());
CUDNN_CALL(cudnnCreateTensorDescriptor(&descriptors[i]));
}
int dims[3] = { (int)dim, (int)numSequencesForFrame[i], 1 };
int strides[3] = { 1, dims[0], dims[0] * dims[1] };
int dims[3] = { (int)numSequencesForFrame[i], (int)dim, 1 };
int strides[3] = { dims[2] * dims[1], dims[2], 1 };
CUDNN_CALL(cudnnSetTensorNdDescriptor(descriptors[i], CUDNN_DATA_FLOAT, 3, dims, strides));
}
}
@ -93,20 +75,19 @@ void CuDnnRNNExecutor<ElemType>::ForwardCore(
if (m_yDim != (m_rnnT->isBidirectional() ? 2 : 1) * m_rnnT->GetNumHidden())
InvalidArgument("CuDnn ForwardCore: Output leading dimension must be twice hidden size for bidirectional networks");
m_rnnT->SetLength(numSequencesForFrame.size());
// set up the input and output descriptors
SetDescriptors(m_xDim, numSequencesForFrame, xDesc);
SetDescriptors(m_yDim, numSequencesForFrame, yDesc);
// ensure workspace and reserve are large enough
m_seqLength = numSequencesForFrame.size();
size_t workSize;
size_t reserveSize;
// Need for every pass
CUDNN_CALL(cudnnGetRNNWorkspaceSize(*m_cudnn, *m_rnnT, xDesc.data(), &workSize));
CUDNN_CALL(cudnnGetRNNWorkspaceSize(*m_cudnn, *m_rnnT, (int)m_seqLength, xDesc.data(), &workSize));
// Only needed in training, can't be touched between passes.
CUDNN_CALL(cudnnGetRNNTrainingReserveSize(*m_cudnn, *m_rnnT, xDesc.data(), &reserveSize));
CUDNN_CALL(cudnnGetRNNTrainingReserveSize(*m_cudnn, *m_rnnT, (int)m_seqLength, xDesc.data(), &reserveSize));
// convert from bytes to ElemType
workSize = (workSize + sizeof(ElemType) - 1) / (sizeof(ElemType));
@ -115,12 +96,13 @@ void CuDnnRNNExecutor<ElemType>::ForwardCore(
reserve.Resize(reserveSize, 1);
workspace.Resize(workSize, 1);
wDesc = make_unique<CuDnnFilter<ElemType>>(*m_rnnT, xDesc.data());
wDesc = make_unique<CuDnnFilter<ElemType>>(*m_rnnT, xDesc[0]);
if (wDesc->GetSize() != weightsW.GetNumElements())
InvalidArgument("RNN needs %ld parameters, but %ld were allocated", wDesc->GetSize(), weightsW.GetNumRows());
CUDNN_CALL(cudnnRNNForwardTraining(
*m_cudnn, *m_rnnT,
(int)m_seqLength,
xDesc.data(), inputX.Data(),
0, 0,
0, 0,
@ -148,6 +130,7 @@ void CuDnnRNNExecutor<ElemType>::BackwardDataCore(
{
CUDNN_CALL(cudnnRNNBackwardData(
*m_cudnn, *m_rnnT,
(int)m_seqLength,
yDesc.data(), outputY.Data(),
yDesc.data(), outputDY.Data(),
0, 0,
@ -177,6 +160,7 @@ void CuDnnRNNExecutor<ElemType>::BackwardWeightsCore(const GPUMatrix<ElemType>&
LogicError("out of order calling you have been very bad");
CUDNN_CALL(cudnnRNNBackwardWeights(
*m_cudnn, *m_rnnT,
(int)m_seqLength,
xDesc.data(), inputX.Data(),
0, 0,
yDesc.data(), outputY.Data(),
@ -185,9 +169,6 @@ void CuDnnRNNExecutor<ElemType>::BackwardWeightsCore(const GPUMatrix<ElemType>&
reserve.Data(), reserve.GetNumElements()*sizeof(ElemType)));
}
template class CuDnnRNN<double>;
template class CuDnnRNN<float>;
template class CuDnnRNNExecutor<double>;
template class CuDnnRNNExecutor<float>;

Просмотреть файл

@ -60,7 +60,6 @@ private:
cudnnDataType_t m_dataType;
cudnnRNNDescriptor_t m_rnnDesc;
CuDnnDropout m_dropout;
size_t m_seqLength;
RnnParameters m_rnnParameters;
cudnnRNNMode_t GetMode()
@ -77,12 +76,19 @@ private:
}
public:
CuDnnRNN(const RnnParameters& rnnParameters, const size_t seqLength)
: m_rnnDesc(nullptr), m_dropout(0.0f), m_rnnParameters(rnnParameters), m_seqLength(0),
CuDnnRNN(const RnnParameters& rnnParameters)
: m_rnnDesc(nullptr), m_dropout(0.0f), m_rnnParameters(rnnParameters),
m_dataType(CuDnnTensor::GetDataType<ElemType>())
{
CUDNN_CALL(cudnnCreateRNNDescriptor(&m_rnnDesc));
SetLength(seqLength);
CUDNN_CALL(cudnnSetRNNDescriptor(m_rnnDesc,
(int)m_rnnParameters.m_hiddenSize,
(int)m_rnnParameters.m_numLayers,
m_dropout,
CUDNN_LINEAR_INPUT, // We can also skip the input matrix transformation
m_rnnParameters.m_bidirectional ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL,
GetMode(),
m_dataType));
}
~CuDnnRNN()
@ -99,8 +105,6 @@ public:
return this->m_rnnParameters == rnnParameters;
}
void SetLength(size_t len);
size_t GetLength()
{
return m_seqLength;
@ -126,14 +130,14 @@ class CuDnnFilter
CuDnn::ptr_t m_cudnn;
size_t m_filterSize;
public:
CuDnnFilter(const CuDnnRNN<ElemType>& rnn, const cudnnTensorDescriptor_t *xDesc) :
CuDnnFilter(const CuDnnRNN<ElemType>& rnn, const cudnnTensorDescriptor_t& xDesc) :
m_cudnn(CuDnn::Instance()), m_dataType(CuDnnTensor::GetDataType<ElemType>())
{
CUDNN_CALL(cudnnCreateFilterDescriptor(&m_filterDesc));
try
{
size_t filterSize;
CUDNN_CALL(cudnnGetRNNParamsSize(*m_cudnn, rnn, xDesc, &filterSize));
CUDNN_CALL(cudnnGetRNNParamsSize(*m_cudnn, rnn, xDesc, &filterSize, m_dataType));
size_t dataSize = 2; // CUDNN_DATA_HALF
@ -178,24 +182,20 @@ class CuDnnRNNExecutor
cudnnDataType_t m_dataType;
size_t m_xDim, m_yDim;
public:
CuDnnRNNExecutor(size_t xDim, size_t yDim, size_t seqLength, const RnnParameters& rnnParameters ) :
CuDnnRNNExecutor(size_t xDim, size_t yDim, const RnnParameters& rnnParameters ) :
m_cudnn(CuDnn::Instance()),
m_xDim(xDim), m_yDim(yDim),
m_seqLength(0),
m_dataType(CuDnnTensor::GetDataType<ElemType>()),
m_BackwardDataCalledYet(false)
{
m_rnnT = std::make_unique<CuDnnRNN<ElemType>>(rnnParameters, seqLength);
m_rnnT = std::make_unique<CuDnnRNN<ElemType>>(rnnParameters);
}
void ForwardCore(const GPUMatrix<ElemType>& weightsW, const GPUMatrix<ElemType>& inputX, GPUMatrix<ElemType>& outputY, const vector<size_t>& numSequencesForFrame, const RnnParameters& rnnParameters, GPUMatrix<ElemType>& reserve, GPUMatrix<ElemType>& workspace);
void BackwardWeightsCore(const GPUMatrix<ElemType>& inputX, const GPUMatrix<ElemType>& outputY, GPUMatrix<ElemType>& dw, const RnnParameters& rnnParameters, GPUMatrix<ElemType>& reserve, GPUMatrix<ElemType>& workspace);
void BackwardDataCore(const GPUMatrix<ElemType>& outputY, const GPUMatrix<ElemType>& outputDY, const GPUMatrix<ElemType>& w, GPUMatrix<ElemType>& dx, const RnnParameters& rnnParameters, GPUMatrix<ElemType>& reserve, GPUMatrix<ElemType>& workspace);
void SetLength(int len)
{
m_rnnT->SetLength(len);
}
protected:
std::unique_ptr<CuDnnFilter<ElemType>> wDesc;
vector<cudnnTensorDescriptor_t> xDesc;
@ -216,6 +216,7 @@ private:
private:
std::unique_ptr<CuDnnRNN<ElemType>> m_rnnT;
bool m_BackwardDataCalledYet;
size_t m_seqLength;
};
} } }

Просмотреть файл

@ -3154,7 +3154,7 @@ void GPUMatrix<ElemType>::RNNForward(const GPUMatrix<ElemType> &inputX, const GP
if (!m_RNNWrapper)
m_RNNWrapper = std::make_unique<RNNWrapper>();
if (!m_RNNWrapper->m_rnnExecutor)
m_RNNWrapper->m_rnnExecutor = std::make_unique<CuDnnRNNExecutor<ElemType>>(xDim, yDim, numSequencesForFrame.size(), rnnParameters);
m_RNNWrapper->m_rnnExecutor = std::make_unique<CuDnnRNNExecutor<ElemType>>(xDim, yDim, rnnParameters);
m_RNNWrapper->m_rnnExecutor->ForwardCore(paramW, inputX, *this, numSequencesForFrame, rnnParameters, reserve, workspace);
}

Просмотреть файл

@ -30,7 +30,7 @@
<CuDnnIncPath>$(CUDNN_PATH)\include</CuDnnIncPath>
<CuDnnLibPath>$(CUDNN_PATH)\lib\x64</CuDnnLibPath>
<CuDnnLib>cudnn.lib</CuDnnLib>
<CuDnnDll>$(CUDNN_PATH)\bin\cudnn64_4.dll</CuDnnDll>
<CuDnnDll>$(CUDNN_PATH)\bin\cudnn64_5.dll</CuDnnDll>
</PropertyGroup>
</When>
<Otherwise>