This commit is contained in:
U-FAREAST\fseide 2016-04-29 22:08:16 -07:00
Родитель 013733a41d 73a8cd8eed
Коммит 2d15aa116f
21 изменённых файлов: 273 добавлений и 92 удалений

Просмотреть файл

@ -524,7 +524,7 @@ Parameters =
RNNs =
[
# LSTMP -- LSTM function with projection and self-stabilization
# Projection it enabled by passing different values for outputDim and cellDim.
# Projection is enabled by passing different values for outputDim and cellDim.
# This is the stateless version that takes the previous state as an input.
# It returns a dictionary with three members: h and c, and dim=h.dim for convenience. prevState must have h and c.
LSTMP (outputDim, cellDim=outputDim, x, inputDim=x.dim, prevState, enableSelfStabilization=false) =

Просмотреть файл

@ -938,7 +938,7 @@ public:
if (m_value)
{
node->CreateValueMatrixIfNull();
node->m_value->SetValue(*m_value);
node->m_value->SetValue(*m_value);
}
else
node->m_value = nullptr;
@ -1549,6 +1549,7 @@ public:
void Trace()
{
//DebugLogMinibatch();
#if 0
static const std::set<std::wstring> toLog{
L"labelSentenceStartEmbedded",

Просмотреть файл

@ -320,6 +320,7 @@ public:
{
// currently we only support one combination when the input is sparse
// If input data is sparse, then gradient is block sparse.
// BUGBUG: This does not accumulate into the Input(0)->Gradient, which might cause problems elsewhere.
if (Input(1)->Value().GetMatrixType() == SPARSE && Input(0)->Gradient().GetMatrixType() == DENSE && Gradient().GetMatrixType() == DENSE)
Input(0)->Gradient().SwitchToMatrixType(SPARSE, MatrixFormat::matrixFormatSparseBlockCol, false);
auto input0Gradient = OneSampleTensorFor(0, /*gradient=*/true, fr.AllowBroadcast());
@ -433,9 +434,11 @@ public:
std::swap(dimsA[0], dimsA[1]);
// update if LearnableParameter
Input(0)->ValidateInferInputDimsFrom(TensorShape(dimsA));
#if 0 // Removed this, because the check is just wrong.
// and verify once again
if (isFinalValidationPass && Input(0)->GetSampleLayout().GetDims() != dimsA)
InvalidArgument("%ls %ls operation: Left [%s] and right [%s] operands' shapes are not compatible.", NodeName().c_str(), OperationName().c_str(), dimsAstring.c_str(), dimsBstring.c_str());
#endif
}
}
@ -575,7 +578,7 @@ public:
Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
sliceOutputValue.SetValue(sliceInput1Value);
sliceOutputValue.AssignValuesOf(sliceInput1Value);
sliceOutputValue.ColumnElementMultiplyWith(Input(0)->ValueAsMatrix());
}

Просмотреть файл

@ -321,7 +321,7 @@ public:
inp = Input(0)->ValueFor(frDelayed.Sequence(id));
// inp = Input(0)->ValueFor(FrameRange(m_pMBLayout, t_delayed).Sequence(id));
out.SetValue(inp);
out.AssignValuesOf(inp);
}
}
}
@ -358,7 +358,7 @@ public:
inp = Input(0)->ValueFor(frDelayed);
// inp = Input(0)->ValueFor(FrameRange(m_pMBLayout, t_delayed));
out.SetValue(inp);
out.AssignValuesOf(inp);
}
}

Просмотреть файл

@ -150,12 +150,12 @@ public:
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
ValueFor(fr).SetValue(Input(0)->ValueFor(fr));
ValueFor(fr).AssignValuesOf(Input(0)->ValueFor(fr));
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
Input(inputIndex)->GradientFor(fr).SetValue(GradientFor(fr));
Input(inputIndex)->GradientFor(fr).AssignValuesOf(GradientFor(fr));
}
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
@ -255,7 +255,7 @@ public:
Input(1)->NodeName().c_str(), Input(1)->OperationName().c_str());
// copy the data from 'dataInput'
ValueFor(fr).SetValue(Input(0)->ValueFor(fr.WithLayout(Input(0)->GetMBLayout()))); // just propagate through
ValueFor(fr).AssignValuesOf(Input(0)->ValueFor(fr.WithLayout(Input(0)->GetMBLayout()))); // just propagate through
// TODO: Once we do in-place, the above must include a copy-to-self check (either here or inside the matrix lib).
}
@ -1080,7 +1080,7 @@ public:
// (We still need to copy the values since there is currently no way to point to an input function value while reshaping at the same time.)
if (!m_pMBLayout || factor() == 1)
{
Value().Reshaped(newCols * m_numTargetRows, 1).SetValue(Input(0)->Value().Reshaped(cols * rows, 1)); // copy the values as one long vector
Value().Reshaped(newCols * m_numTargetRows, 1).AssignValuesOf(Input(0)->Value().Reshaped(cols * rows, 1)); // copy the values as one long vector
}
// layout case: reshape semantics happens across parallel seqeunces, i.e. requiring data shuffling
else

Просмотреть файл

@ -177,6 +177,7 @@ public:
// first compute the softmax (column-wise)
// Note that we need both log and non-log for gradient computation.
m_logSoftmaxOfRight->AssignLogSoftmaxOf(Input(1)->ValueFor(fr), true);
// BUGBUG: No need to compute m_softmaxOfRight in ForwardProp, should be moved to BackpropTo().
m_softmaxOfRight->SetValue(*m_logSoftmaxOfRight);
m_softmaxOfRight->InplaceExp();
// flatten all gaps to zero, such that gaps will contribute zero to the sum
@ -780,7 +781,7 @@ private:
case 3:
{
Matrix<ElemType> grd_t = Input(CLASSPROBINDATA)->GradientFor(fr);
grd_t.SetValue(Input(CLASSPROBINDATA)->DataFor(m_clsSoftmax, fr));
grd_t.AssignValuesOf(Input(CLASSPROBINDATA)->DataFor(m_clsSoftmax, fr));
ComputeCEPartialToSoftmaxInputs(grd_t, Gradient(), c_t);
break;
}
@ -811,7 +812,7 @@ private:
size_t idx_in_class = y_t - lft_bnd;
ComputeCEPartialToSoftmaxInputs(softMax, Gradient(), idx_in_class);
m_grdToSoftMaxInput.ColumnSlice(sz, nbr_wrd).SetValue(softMax);
m_grdToSoftMaxInput.ColumnSlice(sz, nbr_wrd).AssignValuesOf(softMax);
});
m_needRecomputeGradientToSoftmaxInput = false;
@ -1768,7 +1769,7 @@ public:
sliceOutputValue, m_epsilon, *m_saveMean, *m_saveInvStdDev);
m_mbCount++;
}
}
void Validate(bool isFinalValidationPass) override
{

Просмотреть файл

@ -852,6 +852,24 @@ void CPUMatrix<ElemType>::SetValue(const CPUMatrix<ElemType>& deepCopyFrom)
SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), deepCopyFrom.Data(), 0);
}
template <class ElemType>
void CPUMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& /*deepCopyFrom*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void CPUMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom)
{
deepCopyFrom.AssignColumnSliceToDense(*this, 0, deepCopyFrom.GetNumCols());
}
template <class ElemType>
void CPUMatrix<ElemType>::SetValue(const GPUSparseMatrix<ElemType>& /*deepCopyFrom*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, ElemType* pArray, const size_t matrixFlags)
{
@ -6290,6 +6308,9 @@ template CPUMatrix<char>& CPUMatrix<char>::operator=(CPUMatrix<char>&&);
template void CPUMatrix<char>::SetValue(const char);
template void CPUMatrix<char>::SetValue(const size_t numRows, const size_t numCols, char* pArray, size_t matrixFlags);
template void CPUMatrix<char>::SetValue(CPUMatrix<char> const&);
template void CPUMatrix<char>::SetValue(GPUMatrix<char> const&);
template void CPUMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
template void CPUMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
template void CPUMatrix<char>::RequireSize(const size_t numRows, const size_t numCols, bool growOnly);
template void CPUMatrix<char>::Resize(const size_t numRows, const size_t numCols, bool growOnly);

Просмотреть файл

@ -13,6 +13,10 @@
#include <ctime>
#include <limits.h>
#include "GPUMatrix.h"
#include "CPUSparseMatrix.h"
#include "GPUSparseMatrix.h"
// NOTE NOTE NOTE:
// use CPUSingleMatrix and CPUDoubleMatrix instead of using the template directly
///////////////////////////////////////////////
@ -127,6 +131,9 @@ public:
void SetValue(const ElemType v);
void SetValue(const CPUMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUMatrix<ElemType>& deepCopyFrom);
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const size_t numRows, const size_t numCols, ElemType* pArray, size_t matrixFlags = matrixFlagNormal);
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);

Просмотреть файл

@ -264,6 +264,32 @@ void CPUSparseMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& v)
memcpy(RowLocation(), v.RowLocation(), v.RowSize());
memcpy(ColLocation(), v.ColLocation(), v.ColSize());
}
if (v.m_sliceViewOffset > 0)
{
CPUSPARSE_INDEX_TYPE* loc = (GetFormat() == matrixFormatSparseCSC) ? ColLocation() : RowLocation();
size_t len = (GetFormat() == matrixFormatSparseCSC) ? ColSize() : RowSize();
CPUSPARSE_INDEX_TYPE offset = loc[0];
for (size_t c = 0; c < len; c++)
loc[c] -= offset;
}
}
template <class ElemType>
void CPUSparseMatrix<ElemType>::SetValue(const CPUMatrix<ElemType>& /*v*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void CPUSparseMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& /*v*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void CPUSparseMatrix<ElemType>::SetValue(const GPUSparseMatrix<ElemType>& /*v*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
@ -403,6 +429,7 @@ void CPUSparseMatrix<ElemType>::AssignColumnSliceToDense(CPUMatrix<ElemType>& sl
// We can either error out or RequireSize. Because RequireSize will error out if it's not allowed, I think this makes more sense.
slice.RequireSize(m_numRows, numCols);
memset(slice.Data(), 0, sizeof(ElemType) * slice.GetNumElements());
#pragma omp parallel for
for (long j = 0; j < numCols; j++)
{
@ -1348,7 +1375,10 @@ template CPUSparseMatrix<char>::CPUSparseMatrix(CPUSparseMatrix<char> const&);
template CPUSparseMatrix<char>::CPUSparseMatrix(CPUSparseMatrix<char>&&);
template CPUSparseMatrix<char>& CPUSparseMatrix<char>::operator=(CPUSparseMatrix<char>&& moveFrom);
template void CPUSparseMatrix<char>::SetValue(size_t, size_t, char);
template void CPUSparseMatrix<char>::SetValue(CPUMatrix<char> const&);
template void CPUSparseMatrix<char>::SetValue(GPUMatrix<char> const&);
template void CPUSparseMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
template void CPUSparseMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
template char* CPUSparseMatrix<char>::Data() const;
template char* CPUSparseMatrix<char>::Data();
template void CPUSparseMatrix<char>::Reset(void);

Просмотреть файл

@ -6,6 +6,8 @@
#include <stdio.h>
#include "CPUMatrix.h"
#include "GPUMatrix.h"
#include "GPUSparseMatrix.h"
#include <map>
#include <unordered_map>
@ -82,7 +84,11 @@ public:
public:
void SetValue(const size_t row, const size_t col, ElemType val);
void SetValue(const CPUMatrix<ElemType>& /*val*/);
void SetValue(const GPUMatrix<ElemType>& /*val*/);
void SetValue(const CPUSparseMatrix<ElemType>& /*val*/);
void SetValue(const GPUSparseMatrix<ElemType>& /*val*/);
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);
size_t BufferSize() const

Просмотреть файл

@ -312,7 +312,7 @@ protected:
if (in.GetMatrixType() == MatrixType::DENSE || m_gpuSparse1D)
inputSubBatch = in.ColumnSlice(startSampleId, smallBatchSize);
else
inputSubBatch.SetValue(in.ColumnSlice(startSampleId, smallBatchSize), in.GetFormat());
inputSubBatch.SetValue(in.ColumnSlice(startSampleId, smallBatchSize));
if (m_gpuSparseOpt)
{

Просмотреть файл

@ -1093,6 +1093,25 @@ void GPUMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& deepCopyFrom)
SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), deepCopyFrom.GetComputeDeviceId(), deepCopyFrom.Data(), matrixFlagSetValueOnDevice);
}
template <class ElemType>
void GPUMatrix<ElemType>::SetValue(const CPUMatrix<ElemType>& /*deepCopyFrom*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void GPUMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& /*deepCopyFrom*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void GPUMatrix<ElemType>::SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom)
{
deepCopyFrom.CopyToDenseMatrix(*this);
}
template <class ElemType>
void GPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, size_t matrixFlags)
{
@ -4358,7 +4377,10 @@ template GPUMatrix<char>& GPUMatrix<char>::operator=(GPUMatrix<char>&&);
template GPUMatrix<char>::GPUMatrix(int);
template void GPUMatrix<char>::SetValue(const char);
template void GPUMatrix<char>::SetValue(const size_t numRows, const size_t numCols, int deviceId, char* pArray, size_t matrixFlags);
template void GPUMatrix<char>::SetValue(CPUMatrix<char> const&);
template void GPUMatrix<char>::SetValue(GPUMatrix<char> const&);
template void GPUMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
template void GPUMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
template GPUMatrix<int>::GPUMatrix(const size_t, const size_t, int, int*, const size_t);
template GPUMatrix<int>::~GPUMatrix();

Просмотреть файл

@ -19,6 +19,10 @@
#include <memory> // for unique_ptr
#include <limits.h> // for ULONG_MAX
#include "CPUMatrix.h"
#include "CPUSparseMatrix.h"
#include "GPUSparseMatrix.h"
#ifndef _WIN32
#include <unistd.h>
#endif
@ -223,7 +227,10 @@ public:
void MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val);
void SetValue(const CPUMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUMatrix<ElemType>& deepCopyFrom);
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, size_t matrixFlags = matrixFlagNormal);
void SetDiagonalValue(const ElemType v);

Просмотреть файл

@ -260,22 +260,22 @@ void GPUSparseMatrix<ElemType>::CopyToDenseMatrix(GPUMatrix<ElemType>& denseMatr
{
if (sizeof(ElemType) == sizeof(float))
{
CUSPARSE_CALL(cusparseScsr2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (float*) Data(), RowLocation(), ColLocation(), (float*) denseMatrix.Data(), int(GetNumRows())));
CUSPARSE_CALL(cusparseScsr2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (float*) Buffer(), RowLocation(), ColLocation(), (float*) denseMatrix.Data(), int(GetNumRows())));
}
else
{
CUSPARSE_CALL(cusparseDcsr2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (double*) Data(), RowLocation(), ColLocation(), (double*) denseMatrix.Data(), int(GetNumRows())));
CUSPARSE_CALL(cusparseDcsr2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (double*) Buffer(), RowLocation(), ColLocation(), (double*) denseMatrix.Data(), int(GetNumRows())));
}
}
else if (GetFormat() == MatrixFormat::matrixFormatSparseCSC)
{
if (sizeof(ElemType) == sizeof(float))
{
CUSPARSE_CALL(cusparseScsc2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (float*) Data(), RowLocation(), ColLocation(), (float*) denseMatrix.Data(), int(GetNumRows())));
CUSPARSE_CALL(cusparseScsc2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (float*) Buffer(), RowLocation(), ColLocation(), (float*) denseMatrix.Data(), int(GetNumRows())));
}
else
{
CUSPARSE_CALL(cusparseDcsc2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (double*) Data(), RowLocation(), ColLocation(), (double*) denseMatrix.Data(), int(GetNumRows())));
CUSPARSE_CALL(cusparseDcsc2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (double*) Buffer(), RowLocation(), ColLocation(), (double*) denseMatrix.Data(), int(GetNumRows())));
}
}
else
@ -414,6 +414,12 @@ void GPUSparseMatrix<ElemType>::ChangeDeviceTo(DEVICEID_TYPE to_id)
SetComputeDeviceId(PrepareDevice(to_id));
}
template <class ElemType>
void GPUSparseMatrix<ElemType>::SetValue(const CPUMatrix<ElemType>& /*denseMatrix*/)
{
NOT_IMPLEMENTED;
}
template <class ElemType>
void GPUSparseMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& denseMatrix)
{
@ -2656,6 +2662,7 @@ template GPUSparseMatrix<char>::GPUSparseMatrix(GPUSparseMatrix<char>&&);
template void GPUSparseMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
template void GPUSparseMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
template void GPUSparseMatrix<char>::SetValue(GPUMatrix<char> const&);
template void GPUSparseMatrix<char>::SetValue(CPUMatrix<char> const&);
template void GPUSparseMatrix<char>::CopyToDenseMatrix(GPUMatrix<char>&) const;
template void GPUSparseMatrix<char>::CopyToCPUSparseMatrix(CPUSparseMatrix<char>&) const;
template void GPUSparseMatrix<char>::ChangeDeviceTo(int);

Просмотреть файл

@ -5,6 +5,7 @@
#pragma once
#include "CPUMatrix.h"
#include "GPUMatrix.h"
#include "CPUSparseMatrix.h"
#include <functional>
@ -281,10 +282,11 @@ public:
return SecondaryIndexLocation();
}
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUMatrix<ElemType>& denseMatrix, const MatrixFormat matrixFormat);
void SetValue(const CPUMatrix<ElemType>& denseMatrix);
void SetValue(const GPUMatrix<ElemType>& denseMatrix);
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
void SetValue(const GPUMatrix<ElemType>& denseMatrix, const MatrixFormat matrixFormat);
GPUSPARSE_INDEX_TYPE* GetCondensedVector() const;
void MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val);

Просмотреть файл

@ -1198,14 +1198,14 @@ void Matrix<ElemType>::SetColumn(const Matrix<ElemType>& colMat, size_t colInd)
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const Matrix<ElemType>& deepCopyFrom, const MatrixFormat format /*= matrixFormatSparseCSR*/)
void Matrix<ElemType>::SetValue(const Matrix<ElemType>& deepCopyFrom)
{
if (this == &deepCopyFrom)
return;
m_preferredDeviceId = deepCopyFrom.m_preferredDeviceId;
DecideAndMoveToRightDevice(deepCopyFrom, *this);
SwitchToMatrixType(deepCopyFrom.GetMatrixType(), format, false);
SwitchToMatrixType(deepCopyFrom.GetMatrixType(), deepCopyFrom.GetFormat(), false);
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom,
this,
@ -1215,6 +1215,48 @@ void Matrix<ElemType>::SetValue(const Matrix<ElemType>& deepCopyFrom, const Matr
m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix));
}
template <class ElemType>
void Matrix<ElemType>::AssignValuesOf(const Matrix<ElemType>& deepCopyFrom)
{
if (this == &deepCopyFrom)
return;
DISPATCH_MATRIX_ON_FLAG(this, this,
{
// Set CPUMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
},
{
// Set GPUMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
},
{
// Set CPUSparseMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
},
{
// Set GPUSparseMatrix from:
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
});
}
template <class ElemType>
void Matrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, const size_t matrixFlags)
{
@ -1248,18 +1290,18 @@ void Matrix<ElemType>::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYPE* h_CSCC
// Note: The current implementation uses the xPUSparseMatrix as temporary space. This allows for memory sharing between calls. If
// xPUSparseMatrix is a view, this code will cause an error during runtime stating that the view is not writable nor resizable.
DISPATCH_MATRIX_ON_FLAG(this, this,
{
if (!m_CPUSparseMatrix) m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(matrixFormatSparseCSC, numRows, numCols, nz);
m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
m_CPUSparseMatrix->AssignColumnSliceToDense(*m_CPUMatrix, 0, numCols);
},
{
if (!m_GPUSparseMatrix) m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(numRows, numCols, nz, GetDeviceId(), matrixFormatSparseCSC);
m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
m_GPUSparseMatrix->AssignColumnSliceToDense(*m_GPUMatrix, 0, numCols);
},
{ m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols); },
{ m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols); });
{
if (!m_CPUSparseMatrix) m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(matrixFormatSparseCSC, numRows, numCols, nz);
m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
m_CPUSparseMatrix->AssignColumnSliceToDense(*m_CPUMatrix, 0, numCols);
},
{
if (!m_GPUSparseMatrix) m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(numRows, numCols, nz, GetDeviceId(), matrixFormatSparseCSC);
m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
m_GPUSparseMatrix->AssignColumnSliceToDense(*m_GPUMatrix, 0, numCols);
},
{ m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols); },
{ m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols); });
}
template <class ElemType>
@ -1397,52 +1439,58 @@ void Matrix<ElemType>::NormalGrad(Matrix<ElemType>& gradients,
if (!useNesterovMomentum)
{
DISPATCH_MATRIX_ON_FLAG(&gradients,
nullptr,
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
functionValues -= *this,
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
functionValues -= *this,
if (momentum != 0) gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum);
ScaleAndAdd(-learnRatePerSample, gradients, functionValues),
if (momentum != 0) gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum);
ScaleAndAdd(-learnRatePerSample, gradients, functionValues));
DISPATCH_MATRIX_ON_FLAG(&gradients, nullptr,
{
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
functionValues -= *this;
},
{
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
functionValues -= *this;
},
{
if (momentum != 0) gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum);
ScaleAndAdd(-learnRatePerSample, gradients, functionValues);
},
{
if (momentum != 0) gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum);
ScaleAndAdd(-learnRatePerSample, gradients, functionValues);
});
}
else
{
DISPATCH_MATRIX_ON_FLAG(&gradients,
nullptr,
{ /* CPU dense */
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradients, functionValues);
// w_t = w_{t-1} - momentum * v_ {t-1} - (1-momentum)*learnRatePerSampele*gardient,
},
{ /* GPU dense */
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradients, functionValues);
},
{ /* CPU sparse */
if (momentum != 0)
{
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
gradientCache.SetValue(gradients);
gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradientCache, functionValues);
}
},
{ /* GPU sparse */
if (momentum != 0)
{
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
gradientCache.SetValue(gradients);
gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradientCache, functionValues);
}
});
DISPATCH_MATRIX_ON_FLAG(&gradients, nullptr,
{ /* CPU dense */
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradients, functionValues);
// w_t = w_{t-1} - momentum * v_ {t-1} - (1-momentum)*learnRatePerSampele*gardient,
},
{ /* GPU dense */
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradients, functionValues);
},
{ /* CPU sparse */
if (momentum != 0)
{
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
gradientCache.AssignValuesOf(gradients);
gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradientCache, functionValues);
}
},
{ /* GPU sparse */
if (momentum != 0)
{
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
gradientCache.AssignValuesOf(gradients);
gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum);
ScaleAndAdd(-momentum, *this, functionValues);
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradientCache, functionValues);
}
});
}
}
@ -1543,7 +1591,7 @@ Matrix<ElemType> Matrix<ElemType>::RepMat(const Matrix<ElemType>& frmMat, const
Matrix<ElemType> c(nRows, newCols, frmMat.GetDeviceId());
for (size_t i = 0; i < colRatio; i++)
{
c.ColumnSlice(i * nCols, nCols).SetValue(frmMat);
c.ColumnSlice(i * nCols, nCols).AssignValuesOf(frmMat);
}
return c;
@ -5007,9 +5055,9 @@ Matrix<ElemType>& Matrix<ElemType>::Shift(const Matrix<ElemType>& a, int shift)
long n = (long) GetNumCols();
if (shift >= 0 && shift < n)
us.ColumnSlice(shift, n - shift).SetValue(a.ColumnSlice(0, n - shift));
us.ColumnSlice(shift, n - shift).AssignValuesOf(a.ColumnSlice(0, n - shift));
if (shift < 0 && shift > -n)
us.ColumnSlice(0, n + shift).SetValue(a.ColumnSlice(-shift, n + shift));
us.ColumnSlice(0, n + shift).AssignValuesOf(a.ColumnSlice(-shift, n + shift));
return *this;
}
@ -5328,7 +5376,9 @@ template size_t Matrix<char>::GetNumRows() const;
template size_t Matrix<char>::GetNumCols() const;
template void Matrix<char>::SetValue(const char);
template void Matrix<char>::SetValue(size_t numRows, const size_t numCols, int deviceId, char* pArray, size_t matrixFlags);
template void Matrix<char>::SetValue(const Matrix<char>&, MatrixFormat);
//template void Matrix<char>::SetValue(const Matrix<char>&, MatrixFormat);
template void Matrix<char>::SetValue(const Matrix<char>&);
template void Matrix<char>::AssignValuesOf (const Matrix<char>&);
template bool Matrix<char>::IsEmpty() const;
template void Matrix<char>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, bool growOnly);

Просмотреть файл

@ -236,7 +236,11 @@ public:
void SetValue(const ElemType v);
void SetValue(const DeviceBoundNumber<ElemType>& db_number);
void SetValue(const Matrix<ElemType>& deepCopyFrom, const MatrixFormat format = matrixFormatSparseCSR); // BUGBUG: default for 'format' is unexpected
//void SetValue (const Matrix<ElemType>& deepCopyFrom, const MatrixFormat format = matrixFormatSparseCSR); // BUGBUG: default for 'format' is unexpected
// SetValue respects the source matrix's information. It moves the target's location (if necessary), and then copies the sources values.
void SetValue (const Matrix<ElemType>& deepCopyFrom);
// AssignValuesOf respects the target matrix's information. It copies the values from the target into the memory of the source.
void AssignValuesOf(const Matrix<ElemType>& deepCopyFrom);
void SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, const size_t matrixFlags = matrixFlagNormal);
void SetValue(const size_t rIdx, const size_t cIdx, ElemType val); // set matrix sparsely
void SetValue(const size_t numRows, const size_t numCols, std::initializer_list<ElemType> l) // SetValue(2,3, {1,2,3, 4,5,6});

Просмотреть файл

@ -335,6 +335,8 @@ void SparseBinaryInput<ElemType>::Init(std::map<std::wstring, std::wstring> rena
m_inFile.seekg(0, ios::end);
m_fileSize = (size_t) m_inFile.tellg();
m_maxMBSize = 0;
}
template <class ElemType>
@ -436,19 +438,28 @@ void SparseBinaryInput<ElemType>::StartDistributedMinibatchLoop(size_t mbSize, s
ReadOffsets(startMB, m_windowSize);
m_maxMBSize = 0;
size_t maxMBSize = 0;
for (size_t c = 0; c < m_windowSize; c++)
{
m_maxMBSize = max(m_maxMBSize, (size_t)(m_offsets[c + 1] - m_offsets[c]));
maxMBSize = max(maxMBSize, (size_t)(m_offsets[c + 1] - m_offsets[c]));
// fprintf(stderr, "m_offsets[%lu] = %lu\n", c, m_offsets[c]);
}
// fprintf(stderr, "max mb size: %ld\n", m_maxMBSize);
size_t maxMem = 1024 * 1024 * 1024; // 1GB
size_t maxPointers = maxMem / m_maxMBSize;
for (size_t c = 0; c < maxPointers; c++)
if (maxMBSize > m_maxMBSize)
{
void* dataBuffer = malloc(m_maxMBSize);
m_dataToProduce.push(dataBuffer);
m_maxMBSize = maxMBSize;
while (m_dataToProduce.size() > 0)
{
free(m_dataToProduce.pop());
}
// fprintf(stderr, "max mb size: %ld\n", m_maxMBSize);
size_t maxMem = 1024 * 1024 * 1024; // 1GB
size_t maxPointers = maxMem / m_maxMBSize;
for (size_t c = 0; c < maxPointers; c++)
{
void* dataBuffer = malloc(m_maxMBSize);
m_dataToProduce.push(dataBuffer);
}
}
std::thread readData([this]

Просмотреть файл

@ -34,6 +34,15 @@ private:
std::deque<T> d_queue;
public:
void release()
{
while (!d_queue.empty())
free(d_queue.pop_front());
}
size_t size()
{
return d_queue.size();
}
void push(T const& value)
{
{

Просмотреть файл

@ -120,7 +120,7 @@ void oldRNNForwardPropSRP(const size_t timeIdxInSeq, const int delay, const bool
inp = pastActivity.ColumnSlice(d + indexInBatch, 1);
else
inp = inputFunctionValues.ColumnSlice(d + indexInBatch, 1);
out.SetValue(inp);
out.AssignValuesOf(inp);
}
}

Просмотреть файл

@ -884,7 +884,7 @@ BOOST_FIXTURE_TEST_CASE(MatrixColumnSlice, RandomSeedFixture)
cg.SetUniformRandomValue(-1, 1, IncrementCounter());
Matrix<float> dg(k, m, c_deviceIdZero);
dg.SetValue(cg);
dg.AssignValuesOf(cg);
Matrix<float>::MultiplyAndAdd(ag, false, bg, false, dg);