small change for more reuse of code

This commit is contained in:
Bowen Bao 2018-07-05 16:42:51 -07:00
Родитель 0df5c39fbb
Коммит 6cc772f693
8 изменённых файлов: 66 добавлений и 81 удалений

Просмотреть файл

@ -2110,7 +2110,7 @@ public:
row_elements *= dims[i];
}
sourceGradient.ScatterToIndicesWithMask(outputGradient, indices, indicesMask, row_elements);
sourceGradient.ScatterToIndices(outputGradient, indices, row_elements, &indicesMask);
}
else
{

Просмотреть файл

@ -303,8 +303,7 @@ public:
CPUMatrix<ElemType>& AssignOneHot(const CPUMatrix<ElemType>& a, vector<size_t>& shape, size_t axis);
CPUMatrix<ElemType>& GatherFromTarget(const CPUMatrix<ElemType>& indices, const CPUMatrix<ElemType>& target, size_t row_elements);
CPUMatrix<ElemType>& ScatterToIndices(const CPUMatrix<ElemType>& values, const CPUMatrix<ElemType>& indices, size_t row_elements);
CPUMatrix<ElemType>& ScatterToIndices(const CPUMatrix<ElemType>& values, const CPUMatrix<ElemType>& indices, const CPUMatrix<char>& mask, size_t row_elements);
CPUMatrix<ElemType>& ScatterToIndices(const CPUMatrix<ElemType>& values, const CPUMatrix<ElemType>& indices, size_t row_elements, const CPUMatrix<char>* mask = nullptr);
bool IsEqualTo(const CPUMatrix<ElemType>& a, const ElemType threshold = 1e-8) const;
@ -593,7 +592,8 @@ protected:
private:
void Clear();
void ScatterValues(ElemType* indices, ElemType* value, ElemType* data, ElemType alpha, size_t num_indices, size_t rows, size_t cols, size_t indices_step = 1, char* mask = nullptr);
void ScatterValues(ElemType* indices, ElemType* value, ElemType* data, ElemType alpha, size_t num_indices, size_t rows, size_t cols, size_t indices_step = 1);
void ScatterValues(ElemType* indices, ElemType* value, ElemType* data, char* mask, ElemType alpha, size_t num_indices, size_t rows, size_t cols, size_t indices_step = 1);
private:
static int m_optimizationFlags;

Просмотреть файл

@ -3338,35 +3338,25 @@ CPUMatrix<ElemType>& CPUMatrix<ElemType>::GatherFromTarget(const CPUMatrix<ElemT
}
template <class ElemType>
CPUMatrix<ElemType>& CPUMatrix<ElemType>::ScatterToIndices(const CPUMatrix<ElemType>& values, const CPUMatrix<ElemType>& indices, size_t row_elements)
CPUMatrix<ElemType>& CPUMatrix<ElemType>::ScatterToIndices(const CPUMatrix<ElemType>& values, const CPUMatrix<ElemType>& indices, size_t row_elements,
const CPUMatrix<char>* mask/*= nullptr*/)
{
if (indices.IsEmpty() || values.IsEmpty())
LogicError("ScatterToIndices: input matrix is empty.");
if (mask)
{
if (mask->IsEmpty())
LogicError("ScatterToIndices: input matrix is empty.");
if (indices.GetNumRows() != mask->GetNumRows() || indices.GetNumCols() != mask->GetNumCols())
LogicError("ScatterToIndices: indices matrix must have same shape with mask matrix.");
}
ElemType* indicesBufPtr = indices.Data();
ElemType* valueBufPtr = values.Data();
char* maskBufPtr = mask ? mask->Data() : nullptr;
ElemType* buffer = Data();
ScatterValues(indicesBufPtr, valueBufPtr, buffer, (ElemType)1, indices.GetNumElements(), row_elements, this->GetNumCols());
return *this;
}
template <class ElemType>
CPUMatrix<ElemType>& CPUMatrix<ElemType>::ScatterToIndices(const CPUMatrix<ElemType>& values, const CPUMatrix<ElemType>& indices,
const CPUMatrix<char>& mask, size_t row_elements)
{
if (indices.IsEmpty() || values.IsEmpty() || mask.IsEmpty())
LogicError("ScatterToIndices: input matrix is empty.");
if (indices.GetNumRows() != mask.GetNumRows() || indices.GetNumCols() != mask.GetNumCols())
LogicError("ScatterToIndices: indices matrix must have same shape with mask matrix.");
ElemType* indicesBufPtr = indices.Data();
ElemType* valueBufPtr = values.Data();
char* maskBufPtr = mask.Data();
ElemType* buffer = Data();
ScatterValues(indicesBufPtr, valueBufPtr, buffer, (ElemType)1, indices.GetNumElements(), row_elements, this->GetNumCols(), /*indices_step=*/1, maskBufPtr);
ScatterValues(indicesBufPtr, valueBufPtr, buffer, maskBufPtr, (ElemType)1, indices.GetNumElements(), row_elements, this->GetNumCols());
return *this;
}
@ -7293,7 +7283,13 @@ void CPUMatrix<ElemType>::TensorArgOp(const CPUMatrix<ElemType>& a, ElementWiseO
}
template <class ElemType>
void CPUMatrix<ElemType>::ScatterValues(ElemType* indices, ElemType* value, ElemType* data, ElemType alpha, size_t num_indices, size_t rows, size_t cols, size_t indices_step/*=1*/, char* mask/*=nullptr*/)
void CPUMatrix<ElemType>::ScatterValues(ElemType* indices, ElemType* value, ElemType* data, ElemType alpha, size_t num_indices, size_t rows, size_t cols, size_t indices_step/*=1*/)
{
ScatterValues(indices, value, data, nullptr, alpha, num_indices, rows, cols, indices_step);
}
template <class ElemType>
void CPUMatrix<ElemType>::ScatterValues(ElemType* indices, ElemType* value, ElemType* data, char* mask, ElemType alpha, size_t num_indices, size_t rows, size_t cols, size_t indices_step/*=1*/)
{
if (!indices || !value || !data)
LogicError("ScatterValues: input data is null.");

Просмотреть файл

@ -4436,38 +4436,20 @@ GPUMatrix<ElemType>& GPUMatrix<ElemType>::GatherFromTarget(const GPUMatrix<ElemT
}
template <class ElemType>
GPUMatrix<ElemType>& GPUMatrix<ElemType>::ScatterToIndices(const GPUMatrix<ElemType>& values, const GPUMatrix<ElemType>& indices, size_t row_elements)
GPUMatrix<ElemType>& GPUMatrix<ElemType>::ScatterToIndices(const GPUMatrix<ElemType>& values, const GPUMatrix<ElemType>& indices, size_t row_elements, const GPUMatrix<char>* mask/*= nullptr*/)
{
if (indices.IsEmpty() || values.IsEmpty())
if (indices.IsEmpty() || values.IsEmpty() || (mask && mask->IsEmpty()))
LogicError("ScatterToIndices: input matrix is empty.");
ElemType* indicesBufPtr = indices.Data();
ElemType* valueBufPtr = values.Data();
char* maskBufPtr = mask ? mask->Data() : nullptr;
ElemType* buffer = Data();
size_t num_indices = indices.GetNumElements();
CUDA_LONG N = (CUDA_LONG)num_indices * row_elements;
int blocksPerGrid = (int)ceil(((double)N) / GridDim::maxThreadsPerBlock);
_scatterToIndices<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> > (indicesBufPtr, valueBufPtr, buffer, row_elements, num_indices, N);
return *this;
}
template <class ElemType>
GPUMatrix<ElemType>& GPUMatrix<ElemType>::ScatterToIndices(const GPUMatrix<ElemType>& values, const GPUMatrix<ElemType>& indices, const GPUMatrix<char>& mask, size_t row_elements)
{
if (indices.IsEmpty() || values.IsEmpty() || mask.IsEmpty())
LogicError("ScatterToIndices: input matrix is empty.");
ElemType* indicesBufPtr = indices.Data();
ElemType* valueBufPtr = values.Data();
char* maskBufPtr = mask.Data();
ElemType* buffer = Data();
size_t num_indices = indices.GetNumElements();
CUDA_LONG N = (CUDA_LONG)num_indices * row_elements;
int blocksPerGrid = (int)ceil(((double)N) / GridDim::maxThreadsPerBlock);
_scatterToIndices<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> > (indicesBufPtr, valueBufPtr, buffer, row_elements, num_indices, N, maskBufPtr);
_scatterToIndices<ElemType> << <blocksPerGrid, GridDim::maxThreadsPerBlock >> > (indicesBufPtr, valueBufPtr, buffer, maskBufPtr, row_elements, num_indices, N);
return *this;
}

Просмотреть файл

@ -292,8 +292,7 @@ public:
GPUMatrix<ElemType>& AssignOneHot(const GPUMatrix<ElemType>& a, vector<size_t>& shape, size_t axis);
GPUMatrix<ElemType>& GatherFromTarget(const GPUMatrix<ElemType>& indices, const GPUMatrix<ElemType>& target, size_t row_elements);
GPUMatrix<ElemType>& ScatterToIndices(const GPUMatrix<ElemType>& values, const GPUMatrix<ElemType>& indices, size_t row_elements);
GPUMatrix<ElemType>& ScatterToIndices(const GPUMatrix<ElemType>& values, const GPUMatrix<ElemType>& indices, const GPUMatrix<char>& mask, size_t row_elements);
GPUMatrix<ElemType>& ScatterToIndices(const GPUMatrix<ElemType>& values, const GPUMatrix<ElemType>& indices, size_t row_elements, const GPUMatrix<char>* mask = nullptr);
GPUMatrix<ElemType> Transpose() const;
GPUMatrix<ElemType>& AssignTransposeOf(const GPUMatrix<ElemType>& a);

Просмотреть файл

@ -5818,25 +5818,51 @@ __global__ void _gatherFromTarget(ElemType *indices,
}
}
template<class ElemType>
inline __device__ void _scatterToIndices4Index(ElemType *indices,
ElemType *value,
ElemType *buffer,
char *mask,
CUDA_LONG index,
size_t num_row_elements)
{
size_t indices_index = index / num_row_elements;
size_t offset = index % num_row_elements;
//Skip missing values
if (mask && mask[indices_index] == 0) return;
//We resort to nondeterministic behavior (floating point addition is not associative).
//Note that the CPU parallel algorithm will have poor performance on the GPU because of thread divergence
atomicAdd(&buffer[(size_t)(unsigned long long int)indices[indices_index] * num_row_elements + offset], value[index]);
}
template<class ElemType>
__global__ void _scatterToIndices(ElemType *indices,
ElemType *value,
ElemType *buffer,
size_t num_row_elements,
size_t num_indices,
CUDA_LONG num_elements,
char *mask = nullptr)
CUDA_LONG num_elements)
{
const CUDA_LONG index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < num_elements)
{
size_t indices_index = index / num_row_elements;
size_t offset = index % num_row_elements;
//Skip missing values
if (mask && mask[indices_index] == 0) return;
//We resort to nondeterministic behavior (floating point addition is not associative).
//Note that the CPU parallel algorithm will have poor performance on the GPU because of thread divergence
atomicAdd(&buffer[(size_t)(unsigned long long int)indices[indices_index] * num_row_elements + offset], value[index]);
_scatterToIndices4Index(indices, value, buffer, /*mask*/nullptr, index, num_row_elements);
}
}
template<class ElemType>
__global__ void _scatterToIndices(ElemType *indices,
ElemType *value,
ElemType *buffer,
char *mask,
size_t num_row_elements,
size_t num_indices,
CUDA_LONG num_elements)
{
const CUDA_LONG index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < num_elements)
{
_scatterToIndices4Index(indices, value, buffer, mask, index, num_row_elements);
}
}

Просмотреть файл

@ -3692,32 +3692,15 @@ Matrix<ElemType>& Matrix<ElemType>::GatherFromTarget(const Matrix<ElemType>& ind
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ScatterToIndices(const Matrix<ElemType>& values, const Matrix<ElemType>& indices, size_t row_elements)
Matrix<ElemType>& Matrix<ElemType>::ScatterToIndices(const Matrix<ElemType>& values, const Matrix<ElemType>& indices, size_t row_elements, const Matrix<char>* mask/* = nullptr*/)
{
if (indices.IsEmpty() || values.IsEmpty())
LogicError("ScatterAccordingIndices: input matrix is empty.");
DISPATCH_MATRIX_ON_FLAG(&values,
this,
m_CPUMatrix->ScatterToIndices(*values.m_CPUMatrix, *indices.m_CPUMatrix, row_elements),
m_GPUMatrix->ScatterToIndices(*values.m_GPUMatrix, *indices.m_GPUMatrix, row_elements),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
return *this;
}
template <class ElemType>
Matrix<ElemType>& Matrix<ElemType>::ScatterToIndicesWithMask(const Matrix<ElemType>& values, const Matrix<ElemType>& indices, const Matrix<char>& mask, size_t row_elements)
{
if (indices.IsEmpty() || values.IsEmpty() || mask.IsEmpty())
LogicError("ScatterAccordingIndices: input matrix is empty.");
if (indices.GetNumRows() != mask.GetNumRows() || indices.GetNumCols() != mask.GetNumCols())
LogicError("ScatterAccordingIndices: indices matrix must have same shape with mask matrix.");
DISPATCH_MATRIX_ON_FLAG(&values,
this,
m_CPUMatrix->ScatterToIndices(*values.m_CPUMatrix, *indices.m_CPUMatrix, *mask.m_CPUMatrix, row_elements),
m_GPUMatrix->ScatterToIndices(*values.m_GPUMatrix, *indices.m_GPUMatrix, *mask.m_GPUMatrix, row_elements),
m_CPUMatrix->ScatterToIndices(*values.m_CPUMatrix, *indices.m_CPUMatrix, row_elements, mask ? mask->m_CPUMatrix.get() : nullptr),
m_GPUMatrix->ScatterToIndices(*values.m_GPUMatrix, *indices.m_GPUMatrix, row_elements, mask ? mask->m_GPUMatrix.get() : nullptr),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);

Просмотреть файл

@ -320,8 +320,7 @@ public:
Matrix<ElemType>& AssignOneHot(const Matrix<ElemType>& a, vector<size_t>& shape, size_t axis, bool is_sparse);
Matrix<ElemType>& GatherFromTarget(const Matrix<ElemType>& indices, const Matrix<ElemType>& target, size_t row_elements);
Matrix<ElemType>& ScatterToIndices(const Matrix<ElemType>& values, const Matrix<ElemType>& indices, size_t row_elements);
Matrix<ElemType>& ScatterToIndicesWithMask(const Matrix<ElemType>& values, const Matrix<ElemType>& indices, const Matrix<char>& mask, size_t row_elements);
Matrix<ElemType>& ScatterToIndices(const Matrix<ElemType>& values, const Matrix<ElemType>& indices, size_t row_elements, const Matrix<char>* mask = nullptr);
Matrix<ElemType> Transpose(); // This method doesn't change state of Matrix. It should be a const function
Matrix<ElemType>& AssignTransposeOf(const Matrix<ElemType>& a);