This commit is contained in:
Veason-silverbullet 2019-05-20 14:26:59 +08:00
Родитель 273dc3501b
Коммит 453ff07438
11 изменённых файлов: 89 добавлений и 23 удалений

Просмотреть файл

@ -1234,9 +1234,7 @@ void* DistributedGatheredLabels<ElemType>::initializeNodePtr(NULL);
template <class ElemType>
shared_ptr<Matrix<ElemType>> DistributedGatheredLabels<ElemType>::m_gatheredLabels;
template <class ElemType>
shared_ptr<Matrix<ElemType>> DistributedGatheredLabels<ElemType>::m_labelsIndex;
template <class ElemType>
shared_ptr<Matrix<ElemType>> DistributedGatheredLabels<ElemType>::m_labelsValue;
shared_ptr<Matrix<ElemType>> DistributedGatheredLabels<ElemType>::m_labels;
template <class ElemType>
size_t DistributedGatheredLabels<ElemType>::m_minibatchSize(0);

Просмотреть файл

@ -2259,10 +2259,10 @@ template <class ElemType>
class DistributedGatheredLabels
{
public:
static void gatherDistributedLabels(const Matrix<ElemType>& labels)
static void gatherDistributedLabels(const Matrix<ElemType>& oneHotLabels)
{
labels.VectorMax(*m_labelsIndex, *m_labelsValue, true);
m_distGradAggPtr->DistributedAllGather(*m_labelsIndex, *m_gatheredLabels, m_minibatchSize);
Matrix<ElemType>::GetDenseLabelsFromOneHot(oneHotLabels, *m_labels);
m_distGradAggPtr->DistributedAllGather(*m_labels, *m_gatheredLabels, m_minibatchSize);
}
static void setMinibatchSize(size_t minibatchSize)
@ -2273,8 +2273,7 @@ public:
m_distGradAggPtr = (IDistGradAggregator<ElemType>*) Globals::GetDistGradAggPtr();
}
m_gatheredLabels->Resize(1, m_minibatchSize * Globals::GetProcessNum());
m_labelsIndex->Resize(1, m_minibatchSize);
m_labelsValue->Resize(1, m_minibatchSize);
m_labels->Resize(1, m_minibatchSize);
}
static void setInitializeNode(void* nodePtr)
@ -2291,8 +2290,7 @@ public:
static IDistGradAggregator<ElemType>* m_distGradAggPtr;
static void* initializeNodePtr;
static shared_ptr<Matrix<ElemType>> m_gatheredLabels;
static shared_ptr<Matrix<ElemType>> m_labelsIndex;
static shared_ptr<Matrix<ElemType>> m_labelsValue;
static shared_ptr<Matrix<ElemType>> m_labels;
static size_t m_minibatchSize;
};

Просмотреть файл

@ -231,8 +231,7 @@ public:
if (DistributedGatheredLabels<ElemType>::isInitializeNode(this))
{
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_gatheredLabels, matrixPool);
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_labelsIndex, matrixPool, 1, true);
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_labelsValue, matrixPool, 1, true);
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_labels, matrixPool, 1, true);
}
}
@ -244,8 +243,7 @@ public:
if (DistributedGatheredLabels<ElemType>::isInitializeNode(this))
{
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_gatheredLabels, matrixPool);
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_labelsIndex, matrixPool);
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_labelsValue, matrixPool);
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_labels, matrixPool);
}
}

Просмотреть файл

@ -495,8 +495,7 @@ public:
if (DistributedGatheredLabels<ElemType>::isInitializeNode(this))
{
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_gatheredLabels, matrixPool);
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_labelsIndex, matrixPool, 1, true);
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_labelsValue, matrixPool, 1, true);
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_labels, matrixPool, 1, true);
}
}
@ -511,8 +510,7 @@ public:
if (DistributedGatheredLabels<ElemType>::isInitializeNode(this))
{
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_gatheredLabels, matrixPool);
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_labelsIndex, matrixPool);
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_labelsValue, matrixPool);
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_labels, matrixPool);
}
}
@ -686,8 +684,7 @@ public:
if (DistributedGatheredLabels<ElemType>::isInitializeNode(this))
{
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_gatheredLabels, matrixPool);
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_labelsIndex, matrixPool, 1, true);
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_labelsValue, matrixPool, 1, true);
RequestMatrixFromPool(DistributedGatheredLabels<ElemType>::m_labels, matrixPool, 1, true);
}
}
@ -701,8 +698,7 @@ public:
if (DistributedGatheredLabels<ElemType>::isInitializeNode(this))
{
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_gatheredLabels, matrixPool);
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_labelsIndex, matrixPool);
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_labelsValue, matrixPool);
ReleaseMatrixToPool(DistributedGatheredLabels<ElemType>::m_labels, matrixPool);
}
}

Просмотреть файл

@ -487,6 +487,8 @@ public:
#pragma region DistributedFC
static void GetDenseLabelsFromOneHot(const CPUMatrix<ElemType>& oneHotLabels, const CPUMatrix<ElemType>& labels);
static void Scatter(const CPUMatrix<ElemType>& src, const CPUMatrix<ElemType>& dst, size_t minibatchSize, size_t rank, size_t processNum);
static void AddColumnVector(const CPUMatrix<ElemType>& src, const CPUMatrix<ElemType>& columnVector, const CPUMatrix<ElemType>& dst);

Просмотреть файл

@ -5457,7 +5457,7 @@ void CPUMatrix<ElemType>::LabelSmoothing(const CPUMatrix<ElemType>& label, ElemT
// handle remaining stuffs
for (long i = numElements & ~3; i < numElements; i++)
{
if (labelPtr[i] == (ElemType)0)
if (labelPtr[i] == (ElemType)0.5)
labelPtr[i] = smoothValue;
else
labelPtr[i] = labelPtr[i] * keepRate + smoothValue;
@ -5468,6 +5468,36 @@ void CPUMatrix<ElemType>::LabelSmoothing(const CPUMatrix<ElemType>& label, ElemT
#pragma region DistributedFC
template <class ElemType>
void CPUMatrix<ElemType>::GetDenseLabelsFromOneHot(const CPUMatrix<ElemType>& oneHotLabels, const CPUMatrix<ElemType>& labels)
{
long rows = (long)oneHotLabels.GetNumRows();
long cols = (long)oneHotLabels.GetNumCols();
#pragma omp parallel for
for (long j = 0; j < cols; ++j)
{
// four-way unrolling
for (long i = 0; i < (rows & ~3); i += 4)
{
if (oneHotLabels(i, j) > (ElemType)0)
labels(0, j) = i;
if (oneHotLabels(i + 1, j) > (ElemType)0)
labels(0, j) = i + 1;
if (oneHotLabels(i + 2, j) > (ElemType)0)
labels(0, j) = i + 2;
if (oneHotLabels(i + 3, j) > (ElemType)0)
labels(0, j) = i + 3;
}
// handle remaining stuffs
for (long i = rows & ~3; i < rows; i++)
{
if (oneHotLabels(i, j) > (ElemType)0)
labels(0, j) = i;
}
}
}
template <class ElemType>
void CPUMatrix<ElemType>::Scatter(const CPUMatrix<ElemType>& src, const CPUMatrix<ElemType>& dst, size_t minibatchSize, size_t rank, size_t processNum)
{

Просмотреть файл

@ -3852,6 +3852,30 @@ void GPUMatrix<ElemType>::LabelSmoothing(const GPUMatrix<ElemType>& label, ElemT
#pragma region DistributedFC
template <class ElemType>
__global__ void _getDenseLabelsFromOneHot(ElemType* oneHotLabels, ElemType* labels, CUDA_LONG rows, CUDA_LONG numElements)
{
CUDA_LONG id = GridDim::GetLinearThreadId();
if (id < numElements)
{
if (oneHotLabels[id] > (ElemType)0.5)
labels[id / rows] = id % rows;
}
}
template <class ElemType>
void GPUMatrix<ElemType>::GetDenseLabelsFromOneHot(const GPUMatrix<ElemType>& oneHotLabels, const GPUMatrix<ElemType>& labels)
{
CUDA_LONG numElements = (CUDA_LONG)oneHotLabels.GetNumElements();
CUDA_LONG rows = (CUDA_LONG)oneHotLabels.GetNumRows();
int blocksPerGrid = (int)ceil(1.0 * numElements / GridDim::maxThreadsPerBlock);
oneHotLabels.PrepareDevice();
SyncGuard syncGuard;
_getDenseLabelsFromOneHot<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock, 0, t_stream >> > (oneHotLabels.Data(), labels.Data(), rows, numElements);
}
template <class ElemType>
__global__ void _scatter(ElemType* src, ElemType* dst, CUDA_LONG outputDim, CUDA_LONG minioutputDim, CUDA_LONG blockSize, CUDA_LONG blockOffset, CUDA_LONG numElements)
{

Просмотреть файл

@ -603,6 +603,8 @@ public:
#pragma region DistributedFC
static void GetDenseLabelsFromOneHot(const GPUMatrix<ElemType>& oneHotLabels, const GPUMatrix<ElemType>& labels);
static void Scatter(const GPUMatrix<ElemType>& src, const GPUMatrix<ElemType>& dst, size_t minibatchSize, size_t rank, size_t processNum);
static void AddColumnVector(const GPUMatrix<ElemType>& src, const GPUMatrix<ElemType>& columnVector, const GPUMatrix<ElemType>& dst);

Просмотреть файл

@ -5206,6 +5206,17 @@ void Matrix<ElemType>::BatchNormalizationBackward(const Matrix<ElemType>& in, Ma
#pragma region DistributedFC
template <class ElemType>
/*static*/ void Matrix<ElemType>::GetDenseLabelsFromOneHot(const Matrix<ElemType>& oneHotLabels, const Matrix<ElemType>& labels)
{
DISPATCH_MATRIX_ON_FLAG(&labels,
&labels,
CPUMatrix<ElemType>::GetDenseLabelsFromOneHot(*(oneHotLabels.m_CPUMatrix), *(labels.m_CPUMatrix)),
GPUMatrix<ElemType>::GetDenseLabelsFromOneHot(*(oneHotLabels.m_GPUMatrix), *(labels.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
/*static*/ void Matrix<ElemType>::Scatter(const Matrix<ElemType>& src, const Matrix<ElemType>& dst, size_t minibatchSize, size_t rank, size_t processNum)
{

Просмотреть файл

@ -646,6 +646,8 @@ public:
#pragma region DistributedFC
static void GetDenseLabelsFromOneHot(const Matrix<ElemType>& oneHotLabels, const Matrix<ElemType>& labels);
static void Scatter(const Matrix<ElemType>& src, const Matrix<ElemType>& dst, size_t minibatchSize, size_t rank, size_t processNum);
static void AddColumnVector(const Matrix<ElemType>& src, const Matrix<ElemType>& columnVector, const Matrix<ElemType>& dst);

Просмотреть файл

@ -2175,6 +2175,11 @@ void GPUMatrix<ElemType>::LabelSmoothing(const GPUMatrix<ElemType>& label, ElemT
#pragma region DistributedFC
template <class ElemType>
void GPUMatrix<ElemType>::GetDenseLabelsFromOneHot(const GPUMatrix<ElemType>& oneHotLabels, const GPUMatrix<ElemType>& labels)
{
}
template <class ElemType>
void GPUMatrix<ElemType>::Scatter(const GPUMatrix<ElemType>& src, const GPUMatrix<ElemType>& dst, size_t minibatchSize, size_t rank, size_t processNum)
{