CuDNN RNN move to CUDNN5 Release Version
cudnn-7.5-windows10-x64-v5.0-ga This required the following changes - CuDnnRNN::SetLength no longer necessary, the RNN descriptor is length-agnostic. - The sequence length is now a parameter of the backward and forward cudnn calls, so it is stored as a member function in the executor object. It is reset at each call of the forward pass. - dims[] and strides[] in cudnnSetTensorNDDescriptor have been re-arranged. But, data layout should be the same. - other API calls experienced minor changes
This commit is contained in:
Родитель
8dcfa4337c
Коммит
1360f0fb15
|
@ -43,24 +43,6 @@ public:
|
|||
DISABLE_COPY_AND_MOVE(CuDnnTensorDescriptor);
|
||||
};
|
||||
|
||||
template<class ElemType>
|
||||
void CuDnnRNN<ElemType>::SetLength(size_t len)
|
||||
{
|
||||
if (m_seqLength != len)
|
||||
{
|
||||
m_seqLength = len;
|
||||
CUDNN_CALL(cudnnSetRNNDescriptor(m_rnnDesc,
|
||||
(int)m_rnnParameters.m_hiddenSize,
|
||||
(int)m_seqLength,
|
||||
(int)m_rnnParameters.m_numLayers,
|
||||
m_dropout,
|
||||
CUDNN_LINEAR_INPUT, // We can also skip the input matrix transformation
|
||||
m_rnnParameters.m_bidirectional ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL,
|
||||
GetMode(),
|
||||
m_dataType));
|
||||
}
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void CuDnnRNNExecutor<ElemType>::SetDescriptors(size_t dim, const vector<size_t>& numSequencesForFrame, vector<cudnnTensorDescriptor_t>& descriptors)
|
||||
{
|
||||
|
@ -71,8 +53,8 @@ void CuDnnRNNExecutor<ElemType>::SetDescriptors(size_t dim, const vector<size_t>
|
|||
descriptors.push_back(cudnnTensorDescriptor_t());
|
||||
CUDNN_CALL(cudnnCreateTensorDescriptor(&descriptors[i]));
|
||||
}
|
||||
int dims[3] = { (int)dim, (int)numSequencesForFrame[i], 1 };
|
||||
int strides[3] = { 1, dims[0], dims[0] * dims[1] };
|
||||
int dims[3] = { (int)numSequencesForFrame[i], (int)dim, 1 };
|
||||
int strides[3] = { dims[2] * dims[1], dims[2], 1 };
|
||||
CUDNN_CALL(cudnnSetTensorNdDescriptor(descriptors[i], CUDNN_DATA_FLOAT, 3, dims, strides));
|
||||
}
|
||||
}
|
||||
|
@ -93,20 +75,19 @@ void CuDnnRNNExecutor<ElemType>::ForwardCore(
|
|||
if (m_yDim != (m_rnnT->isBidirectional() ? 2 : 1) * m_rnnT->GetNumHidden())
|
||||
InvalidArgument("CuDnn ForwardCore: Output leading dimension must be twice hidden size for bidirectional networks");
|
||||
|
||||
m_rnnT->SetLength(numSequencesForFrame.size());
|
||||
|
||||
// set up the input and output descriptors
|
||||
SetDescriptors(m_xDim, numSequencesForFrame, xDesc);
|
||||
SetDescriptors(m_yDim, numSequencesForFrame, yDesc);
|
||||
|
||||
// ensure workspace and reserve are large enough
|
||||
m_seqLength = numSequencesForFrame.size();
|
||||
size_t workSize;
|
||||
size_t reserveSize;
|
||||
|
||||
// Need for every pass
|
||||
CUDNN_CALL(cudnnGetRNNWorkspaceSize(*m_cudnn, *m_rnnT, xDesc.data(), &workSize));
|
||||
CUDNN_CALL(cudnnGetRNNWorkspaceSize(*m_cudnn, *m_rnnT, (int)m_seqLength, xDesc.data(), &workSize));
|
||||
// Only needed in training, can't be touched between passes.
|
||||
CUDNN_CALL(cudnnGetRNNTrainingReserveSize(*m_cudnn, *m_rnnT, xDesc.data(), &reserveSize));
|
||||
CUDNN_CALL(cudnnGetRNNTrainingReserveSize(*m_cudnn, *m_rnnT, (int)m_seqLength, xDesc.data(), &reserveSize));
|
||||
|
||||
// convert from bytes to ElemType
|
||||
workSize = (workSize + sizeof(ElemType) - 1) / (sizeof(ElemType));
|
||||
|
@ -115,12 +96,13 @@ void CuDnnRNNExecutor<ElemType>::ForwardCore(
|
|||
reserve.Resize(reserveSize, 1);
|
||||
workspace.Resize(workSize, 1);
|
||||
|
||||
wDesc = make_unique<CuDnnFilter<ElemType>>(*m_rnnT, xDesc.data());
|
||||
wDesc = make_unique<CuDnnFilter<ElemType>>(*m_rnnT, xDesc[0]);
|
||||
if (wDesc->GetSize() != weightsW.GetNumElements())
|
||||
InvalidArgument("RNN needs %ld parameters, but %ld were allocated", wDesc->GetSize(), weightsW.GetNumRows());
|
||||
|
||||
CUDNN_CALL(cudnnRNNForwardTraining(
|
||||
*m_cudnn, *m_rnnT,
|
||||
(int)m_seqLength,
|
||||
xDesc.data(), inputX.Data(),
|
||||
0, 0,
|
||||
0, 0,
|
||||
|
@ -148,6 +130,7 @@ void CuDnnRNNExecutor<ElemType>::BackwardDataCore(
|
|||
{
|
||||
CUDNN_CALL(cudnnRNNBackwardData(
|
||||
*m_cudnn, *m_rnnT,
|
||||
(int)m_seqLength,
|
||||
yDesc.data(), outputY.Data(),
|
||||
yDesc.data(), outputDY.Data(),
|
||||
0, 0,
|
||||
|
@ -177,6 +160,7 @@ void CuDnnRNNExecutor<ElemType>::BackwardWeightsCore(const GPUMatrix<ElemType>&
|
|||
LogicError("out of order calling you have been very bad");
|
||||
CUDNN_CALL(cudnnRNNBackwardWeights(
|
||||
*m_cudnn, *m_rnnT,
|
||||
(int)m_seqLength,
|
||||
xDesc.data(), inputX.Data(),
|
||||
0, 0,
|
||||
yDesc.data(), outputY.Data(),
|
||||
|
@ -185,9 +169,6 @@ void CuDnnRNNExecutor<ElemType>::BackwardWeightsCore(const GPUMatrix<ElemType>&
|
|||
reserve.Data(), reserve.GetNumElements()*sizeof(ElemType)));
|
||||
}
|
||||
|
||||
template class CuDnnRNN<double>;
|
||||
template class CuDnnRNN<float>;
|
||||
|
||||
template class CuDnnRNNExecutor<double>;
|
||||
template class CuDnnRNNExecutor<float>;
|
||||
|
||||
|
|
|
@ -60,7 +60,6 @@ private:
|
|||
cudnnDataType_t m_dataType;
|
||||
cudnnRNNDescriptor_t m_rnnDesc;
|
||||
CuDnnDropout m_dropout;
|
||||
size_t m_seqLength;
|
||||
RnnParameters m_rnnParameters;
|
||||
|
||||
cudnnRNNMode_t GetMode()
|
||||
|
@ -77,12 +76,19 @@ private:
|
|||
}
|
||||
|
||||
public:
|
||||
CuDnnRNN(const RnnParameters& rnnParameters, const size_t seqLength)
|
||||
: m_rnnDesc(nullptr), m_dropout(0.0f), m_rnnParameters(rnnParameters), m_seqLength(0),
|
||||
CuDnnRNN(const RnnParameters& rnnParameters)
|
||||
: m_rnnDesc(nullptr), m_dropout(0.0f), m_rnnParameters(rnnParameters),
|
||||
m_dataType(CuDnnTensor::GetDataType<ElemType>())
|
||||
{
|
||||
CUDNN_CALL(cudnnCreateRNNDescriptor(&m_rnnDesc));
|
||||
SetLength(seqLength);
|
||||
CUDNN_CALL(cudnnSetRNNDescriptor(m_rnnDesc,
|
||||
(int)m_rnnParameters.m_hiddenSize,
|
||||
(int)m_rnnParameters.m_numLayers,
|
||||
m_dropout,
|
||||
CUDNN_LINEAR_INPUT, // We can also skip the input matrix transformation
|
||||
m_rnnParameters.m_bidirectional ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL,
|
||||
GetMode(),
|
||||
m_dataType));
|
||||
}
|
||||
|
||||
~CuDnnRNN()
|
||||
|
@ -99,8 +105,6 @@ public:
|
|||
return this->m_rnnParameters == rnnParameters;
|
||||
}
|
||||
|
||||
void SetLength(size_t len);
|
||||
|
||||
size_t GetLength()
|
||||
{
|
||||
return m_seqLength;
|
||||
|
@ -126,14 +130,14 @@ class CuDnnFilter
|
|||
CuDnn::ptr_t m_cudnn;
|
||||
size_t m_filterSize;
|
||||
public:
|
||||
CuDnnFilter(const CuDnnRNN<ElemType>& rnn, const cudnnTensorDescriptor_t *xDesc) :
|
||||
CuDnnFilter(const CuDnnRNN<ElemType>& rnn, const cudnnTensorDescriptor_t& xDesc) :
|
||||
m_cudnn(CuDnn::Instance()), m_dataType(CuDnnTensor::GetDataType<ElemType>())
|
||||
{
|
||||
CUDNN_CALL(cudnnCreateFilterDescriptor(&m_filterDesc));
|
||||
try
|
||||
{
|
||||
size_t filterSize;
|
||||
CUDNN_CALL(cudnnGetRNNParamsSize(*m_cudnn, rnn, xDesc, &filterSize));
|
||||
CUDNN_CALL(cudnnGetRNNParamsSize(*m_cudnn, rnn, xDesc, &filterSize, m_dataType));
|
||||
|
||||
size_t dataSize = 2; // CUDNN_DATA_HALF
|
||||
|
||||
|
@ -178,24 +182,20 @@ class CuDnnRNNExecutor
|
|||
cudnnDataType_t m_dataType;
|
||||
size_t m_xDim, m_yDim;
|
||||
public:
|
||||
CuDnnRNNExecutor(size_t xDim, size_t yDim, size_t seqLength, const RnnParameters& rnnParameters ) :
|
||||
CuDnnRNNExecutor(size_t xDim, size_t yDim, const RnnParameters& rnnParameters ) :
|
||||
m_cudnn(CuDnn::Instance()),
|
||||
m_xDim(xDim), m_yDim(yDim),
|
||||
m_seqLength(0),
|
||||
m_dataType(CuDnnTensor::GetDataType<ElemType>()),
|
||||
m_BackwardDataCalledYet(false)
|
||||
{
|
||||
m_rnnT = std::make_unique<CuDnnRNN<ElemType>>(rnnParameters, seqLength);
|
||||
m_rnnT = std::make_unique<CuDnnRNN<ElemType>>(rnnParameters);
|
||||
}
|
||||
|
||||
void ForwardCore(const GPUMatrix<ElemType>& weightsW, const GPUMatrix<ElemType>& inputX, GPUMatrix<ElemType>& outputY, const vector<size_t>& numSequencesForFrame, const RnnParameters& rnnParameters, GPUMatrix<ElemType>& reserve, GPUMatrix<ElemType>& workspace);
|
||||
void BackwardWeightsCore(const GPUMatrix<ElemType>& inputX, const GPUMatrix<ElemType>& outputY, GPUMatrix<ElemType>& dw, const RnnParameters& rnnParameters, GPUMatrix<ElemType>& reserve, GPUMatrix<ElemType>& workspace);
|
||||
void BackwardDataCore(const GPUMatrix<ElemType>& outputY, const GPUMatrix<ElemType>& outputDY, const GPUMatrix<ElemType>& w, GPUMatrix<ElemType>& dx, const RnnParameters& rnnParameters, GPUMatrix<ElemType>& reserve, GPUMatrix<ElemType>& workspace);
|
||||
|
||||
void SetLength(int len)
|
||||
{
|
||||
m_rnnT->SetLength(len);
|
||||
}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<CuDnnFilter<ElemType>> wDesc;
|
||||
vector<cudnnTensorDescriptor_t> xDesc;
|
||||
|
@ -216,6 +216,7 @@ private:
|
|||
private:
|
||||
std::unique_ptr<CuDnnRNN<ElemType>> m_rnnT;
|
||||
bool m_BackwardDataCalledYet;
|
||||
size_t m_seqLength;
|
||||
};
|
||||
|
||||
} } }
|
|
@ -3154,7 +3154,7 @@ void GPUMatrix<ElemType>::RNNForward(const GPUMatrix<ElemType> &inputX, const GP
|
|||
if (!m_RNNWrapper)
|
||||
m_RNNWrapper = std::make_unique<RNNWrapper>();
|
||||
if (!m_RNNWrapper->m_rnnExecutor)
|
||||
m_RNNWrapper->m_rnnExecutor = std::make_unique<CuDnnRNNExecutor<ElemType>>(xDim, yDim, numSequencesForFrame.size(), rnnParameters);
|
||||
m_RNNWrapper->m_rnnExecutor = std::make_unique<CuDnnRNNExecutor<ElemType>>(xDim, yDim, rnnParameters);
|
||||
m_RNNWrapper->m_rnnExecutor->ForwardCore(paramW, inputX, *this, numSequencesForFrame, rnnParameters, reserve, workspace);
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
<CuDnnIncPath>$(CUDNN_PATH)\include</CuDnnIncPath>
|
||||
<CuDnnLibPath>$(CUDNN_PATH)\lib\x64</CuDnnLibPath>
|
||||
<CuDnnLib>cudnn.lib</CuDnnLib>
|
||||
<CuDnnDll>$(CUDNN_PATH)\bin\cudnn64_4.dll</CuDnnDll>
|
||||
<CuDnnDll>$(CUDNN_PATH)\bin\cudnn64_5.dll</CuDnnDll>
|
||||
</PropertyGroup>
|
||||
</When>
|
||||
<Otherwise>
|
||||
|
|
Загрузка…
Ссылка в новой задаче