merged from thhoens/bs
This commit is contained in:
Коммит
2d15aa116f
|
@ -524,7 +524,7 @@ Parameters =
|
|||
RNNs =
|
||||
[
|
||||
# LSTMP -- LSTM function with projection and self-stabilization
|
||||
# Projection it enabled by passing different values for outputDim and cellDim.
|
||||
# Projection is enabled by passing different values for outputDim and cellDim.
|
||||
# This is the stateless version that takes the previous state as an input.
|
||||
# It returns a dictionary with three members: h and c, and dim=h.dim for convenience. prevState must have h and c.
|
||||
LSTMP (outputDim, cellDim=outputDim, x, inputDim=x.dim, prevState, enableSelfStabilization=false) =
|
||||
|
|
|
@ -938,7 +938,7 @@ public:
|
|||
if (m_value)
|
||||
{
|
||||
node->CreateValueMatrixIfNull();
|
||||
node->m_value->SetValue(*m_value);
|
||||
node->m_value->SetValue(*m_value);
|
||||
}
|
||||
else
|
||||
node->m_value = nullptr;
|
||||
|
@ -1549,6 +1549,7 @@ public:
|
|||
|
||||
void Trace()
|
||||
{
|
||||
//DebugLogMinibatch();
|
||||
#if 0
|
||||
static const std::set<std::wstring> toLog{
|
||||
L"labelSentenceStartEmbedded",
|
||||
|
|
|
@ -320,6 +320,7 @@ public:
|
|||
{
|
||||
// currently we only support one combination when the input is sparse
|
||||
// If input data is sparse, then gradient is block sparse.
|
||||
// BUGBUG: This does not accumulate into the Input(0)->Gradient, which might cause problems elsewhere.
|
||||
if (Input(1)->Value().GetMatrixType() == SPARSE && Input(0)->Gradient().GetMatrixType() == DENSE && Gradient().GetMatrixType() == DENSE)
|
||||
Input(0)->Gradient().SwitchToMatrixType(SPARSE, MatrixFormat::matrixFormatSparseBlockCol, false);
|
||||
auto input0Gradient = OneSampleTensorFor(0, /*gradient=*/true, fr.AllowBroadcast());
|
||||
|
@ -433,9 +434,11 @@ public:
|
|||
std::swap(dimsA[0], dimsA[1]);
|
||||
// update if LearnableParameter
|
||||
Input(0)->ValidateInferInputDimsFrom(TensorShape(dimsA));
|
||||
#if 0 // Removed this, because the check is just wrong.
|
||||
// and verify once again
|
||||
if (isFinalValidationPass && Input(0)->GetSampleLayout().GetDims() != dimsA)
|
||||
InvalidArgument("%ls %ls operation: Left [%s] and right [%s] operands' shapes are not compatible.", NodeName().c_str(), OperationName().c_str(), dimsAstring.c_str(), dimsBstring.c_str());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -575,7 +578,7 @@ public:
|
|||
Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
|
||||
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
|
||||
|
||||
sliceOutputValue.SetValue(sliceInput1Value);
|
||||
sliceOutputValue.AssignValuesOf(sliceInput1Value);
|
||||
sliceOutputValue.ColumnElementMultiplyWith(Input(0)->ValueAsMatrix());
|
||||
}
|
||||
|
||||
|
|
|
@ -321,7 +321,7 @@ public:
|
|||
inp = Input(0)->ValueFor(frDelayed.Sequence(id));
|
||||
// inp = Input(0)->ValueFor(FrameRange(m_pMBLayout, t_delayed).Sequence(id));
|
||||
|
||||
out.SetValue(inp);
|
||||
out.AssignValuesOf(inp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -358,7 +358,7 @@ public:
|
|||
inp = Input(0)->ValueFor(frDelayed);
|
||||
// inp = Input(0)->ValueFor(FrameRange(m_pMBLayout, t_delayed));
|
||||
|
||||
out.SetValue(inp);
|
||||
out.AssignValuesOf(inp);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -150,12 +150,12 @@ public:
|
|||
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
ValueFor(fr).SetValue(Input(0)->ValueFor(fr));
|
||||
ValueFor(fr).AssignValuesOf(Input(0)->ValueFor(fr));
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
||||
{
|
||||
Input(inputIndex)->GradientFor(fr).SetValue(GradientFor(fr));
|
||||
Input(inputIndex)->GradientFor(fr).AssignValuesOf(GradientFor(fr));
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
|
||||
|
@ -255,7 +255,7 @@ public:
|
|||
Input(1)->NodeName().c_str(), Input(1)->OperationName().c_str());
|
||||
|
||||
// copy the data from 'dataInput'
|
||||
ValueFor(fr).SetValue(Input(0)->ValueFor(fr.WithLayout(Input(0)->GetMBLayout()))); // just propagate through
|
||||
ValueFor(fr).AssignValuesOf(Input(0)->ValueFor(fr.WithLayout(Input(0)->GetMBLayout()))); // just propagate through
|
||||
// TODO: Once we do in-place, the above must include a copy-to-self check (either here or inside the matrix lib).
|
||||
}
|
||||
|
||||
|
@ -1080,7 +1080,7 @@ public:
|
|||
// (We still need to copy the values since there is currently no way to point to an input function value while reshaping at the same time.)
|
||||
if (!m_pMBLayout || factor() == 1)
|
||||
{
|
||||
Value().Reshaped(newCols * m_numTargetRows, 1).SetValue(Input(0)->Value().Reshaped(cols * rows, 1)); // copy the values as one long vector
|
||||
Value().Reshaped(newCols * m_numTargetRows, 1).AssignValuesOf(Input(0)->Value().Reshaped(cols * rows, 1)); // copy the values as one long vector
|
||||
}
|
||||
// layout case: reshape semantics happens across parallel seqeunces, i.e. requiring data shuffling
|
||||
else
|
||||
|
|
|
@ -177,6 +177,7 @@ public:
|
|||
// first compute the softmax (column-wise)
|
||||
// Note that we need both log and non-log for gradient computation.
|
||||
m_logSoftmaxOfRight->AssignLogSoftmaxOf(Input(1)->ValueFor(fr), true);
|
||||
// BUGBUG: No need to compute m_softmaxOfRight in ForwardProp, should be moved to BackpropTo().
|
||||
m_softmaxOfRight->SetValue(*m_logSoftmaxOfRight);
|
||||
m_softmaxOfRight->InplaceExp();
|
||||
// flatten all gaps to zero, such that gaps will contribute zero to the sum
|
||||
|
@ -780,7 +781,7 @@ private:
|
|||
case 3:
|
||||
{
|
||||
Matrix<ElemType> grd_t = Input(CLASSPROBINDATA)->GradientFor(fr);
|
||||
grd_t.SetValue(Input(CLASSPROBINDATA)->DataFor(m_clsSoftmax, fr));
|
||||
grd_t.AssignValuesOf(Input(CLASSPROBINDATA)->DataFor(m_clsSoftmax, fr));
|
||||
ComputeCEPartialToSoftmaxInputs(grd_t, Gradient(), c_t);
|
||||
break;
|
||||
}
|
||||
|
@ -811,7 +812,7 @@ private:
|
|||
size_t idx_in_class = y_t - lft_bnd;
|
||||
ComputeCEPartialToSoftmaxInputs(softMax, Gradient(), idx_in_class);
|
||||
|
||||
m_grdToSoftMaxInput.ColumnSlice(sz, nbr_wrd).SetValue(softMax);
|
||||
m_grdToSoftMaxInput.ColumnSlice(sz, nbr_wrd).AssignValuesOf(softMax);
|
||||
});
|
||||
|
||||
m_needRecomputeGradientToSoftmaxInput = false;
|
||||
|
@ -1768,7 +1769,7 @@ public:
|
|||
sliceOutputValue, m_epsilon, *m_saveMean, *m_saveInvStdDev);
|
||||
|
||||
m_mbCount++;
|
||||
}
|
||||
}
|
||||
|
||||
void Validate(bool isFinalValidationPass) override
|
||||
{
|
||||
|
|
|
@ -852,6 +852,24 @@ void CPUMatrix<ElemType>::SetValue(const CPUMatrix<ElemType>& deepCopyFrom)
|
|||
SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), deepCopyFrom.Data(), 0);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void CPUMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& /*deepCopyFrom*/)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void CPUMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom)
|
||||
{
|
||||
deepCopyFrom.AssignColumnSliceToDense(*this, 0, deepCopyFrom.GetNumCols());
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void CPUMatrix<ElemType>::SetValue(const GPUSparseMatrix<ElemType>& /*deepCopyFrom*/)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, ElemType* pArray, const size_t matrixFlags)
|
||||
{
|
||||
|
@ -6290,6 +6308,9 @@ template CPUMatrix<char>& CPUMatrix<char>::operator=(CPUMatrix<char>&&);
|
|||
template void CPUMatrix<char>::SetValue(const char);
|
||||
template void CPUMatrix<char>::SetValue(const size_t numRows, const size_t numCols, char* pArray, size_t matrixFlags);
|
||||
template void CPUMatrix<char>::SetValue(CPUMatrix<char> const&);
|
||||
template void CPUMatrix<char>::SetValue(GPUMatrix<char> const&);
|
||||
template void CPUMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
|
||||
template void CPUMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
|
||||
template void CPUMatrix<char>::RequireSize(const size_t numRows, const size_t numCols, bool growOnly);
|
||||
template void CPUMatrix<char>::Resize(const size_t numRows, const size_t numCols, bool growOnly);
|
||||
|
||||
|
|
|
@ -13,6 +13,10 @@
|
|||
#include <ctime>
|
||||
#include <limits.h>
|
||||
|
||||
#include "GPUMatrix.h"
|
||||
#include "CPUSparseMatrix.h"
|
||||
#include "GPUSparseMatrix.h"
|
||||
|
||||
// NOTE NOTE NOTE:
|
||||
// use CPUSingleMatrix and CPUDoubleMatrix instead of using the template directly
|
||||
///////////////////////////////////////////////
|
||||
|
@ -127,6 +131,9 @@ public:
|
|||
|
||||
void SetValue(const ElemType v);
|
||||
void SetValue(const CPUMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const GPUMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const size_t numRows, const size_t numCols, ElemType* pArray, size_t matrixFlags = matrixFlagNormal);
|
||||
|
||||
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);
|
||||
|
|
|
@ -264,6 +264,32 @@ void CPUSparseMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& v)
|
|||
memcpy(RowLocation(), v.RowLocation(), v.RowSize());
|
||||
memcpy(ColLocation(), v.ColLocation(), v.ColSize());
|
||||
}
|
||||
if (v.m_sliceViewOffset > 0)
|
||||
{
|
||||
CPUSPARSE_INDEX_TYPE* loc = (GetFormat() == matrixFormatSparseCSC) ? ColLocation() : RowLocation();
|
||||
size_t len = (GetFormat() == matrixFormatSparseCSC) ? ColSize() : RowSize();
|
||||
CPUSPARSE_INDEX_TYPE offset = loc[0];
|
||||
for (size_t c = 0; c < len; c++)
|
||||
loc[c] -= offset;
|
||||
}
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::SetValue(const CPUMatrix<ElemType>& /*v*/)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& /*v*/)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::SetValue(const GPUSparseMatrix<ElemType>& /*v*/)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -403,6 +429,7 @@ void CPUSparseMatrix<ElemType>::AssignColumnSliceToDense(CPUMatrix<ElemType>& sl
|
|||
// We can either error out or RequireSize. Because RequireSize will error out if it's not allowed, I think this makes more sense.
|
||||
slice.RequireSize(m_numRows, numCols);
|
||||
|
||||
memset(slice.Data(), 0, sizeof(ElemType) * slice.GetNumElements());
|
||||
#pragma omp parallel for
|
||||
for (long j = 0; j < numCols; j++)
|
||||
{
|
||||
|
@ -1348,7 +1375,10 @@ template CPUSparseMatrix<char>::CPUSparseMatrix(CPUSparseMatrix<char> const&);
|
|||
template CPUSparseMatrix<char>::CPUSparseMatrix(CPUSparseMatrix<char>&&);
|
||||
template CPUSparseMatrix<char>& CPUSparseMatrix<char>::operator=(CPUSparseMatrix<char>&& moveFrom);
|
||||
template void CPUSparseMatrix<char>::SetValue(size_t, size_t, char);
|
||||
template void CPUSparseMatrix<char>::SetValue(CPUMatrix<char> const&);
|
||||
template void CPUSparseMatrix<char>::SetValue(GPUMatrix<char> const&);
|
||||
template void CPUSparseMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
|
||||
template void CPUSparseMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
|
||||
template char* CPUSparseMatrix<char>::Data() const;
|
||||
template char* CPUSparseMatrix<char>::Data();
|
||||
template void CPUSparseMatrix<char>::Reset(void);
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include "CPUMatrix.h"
|
||||
#include "GPUMatrix.h"
|
||||
#include "GPUSparseMatrix.h"
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
|
@ -82,7 +84,11 @@ public:
|
|||
public:
|
||||
|
||||
void SetValue(const size_t row, const size_t col, ElemType val);
|
||||
void SetValue(const CPUMatrix<ElemType>& /*val*/);
|
||||
void SetValue(const GPUMatrix<ElemType>& /*val*/);
|
||||
void SetValue(const CPUSparseMatrix<ElemType>& /*val*/);
|
||||
void SetValue(const GPUSparseMatrix<ElemType>& /*val*/);
|
||||
|
||||
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);
|
||||
|
||||
size_t BufferSize() const
|
||||
|
|
|
@ -312,7 +312,7 @@ protected:
|
|||
if (in.GetMatrixType() == MatrixType::DENSE || m_gpuSparse1D)
|
||||
inputSubBatch = in.ColumnSlice(startSampleId, smallBatchSize);
|
||||
else
|
||||
inputSubBatch.SetValue(in.ColumnSlice(startSampleId, smallBatchSize), in.GetFormat());
|
||||
inputSubBatch.SetValue(in.ColumnSlice(startSampleId, smallBatchSize));
|
||||
|
||||
if (m_gpuSparseOpt)
|
||||
{
|
||||
|
|
|
@ -1093,6 +1093,25 @@ void GPUMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& deepCopyFrom)
|
|||
SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), deepCopyFrom.GetComputeDeviceId(), deepCopyFrom.Data(), matrixFlagSetValueOnDevice);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::SetValue(const CPUMatrix<ElemType>& /*deepCopyFrom*/)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& /*deepCopyFrom*/)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom)
|
||||
{
|
||||
deepCopyFrom.CopyToDenseMatrix(*this);
|
||||
}
|
||||
|
||||
|
||||
template <class ElemType>
|
||||
void GPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, size_t matrixFlags)
|
||||
{
|
||||
|
@ -4358,7 +4377,10 @@ template GPUMatrix<char>& GPUMatrix<char>::operator=(GPUMatrix<char>&&);
|
|||
template GPUMatrix<char>::GPUMatrix(int);
|
||||
template void GPUMatrix<char>::SetValue(const char);
|
||||
template void GPUMatrix<char>::SetValue(const size_t numRows, const size_t numCols, int deviceId, char* pArray, size_t matrixFlags);
|
||||
template void GPUMatrix<char>::SetValue(CPUMatrix<char> const&);
|
||||
template void GPUMatrix<char>::SetValue(GPUMatrix<char> const&);
|
||||
template void GPUMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
|
||||
template void GPUMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
|
||||
|
||||
template GPUMatrix<int>::GPUMatrix(const size_t, const size_t, int, int*, const size_t);
|
||||
template GPUMatrix<int>::~GPUMatrix();
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
#include <memory> // for unique_ptr
|
||||
#include <limits.h> // for ULONG_MAX
|
||||
|
||||
#include "CPUMatrix.h"
|
||||
#include "CPUSparseMatrix.h"
|
||||
#include "GPUSparseMatrix.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
@ -223,7 +227,10 @@ public:
|
|||
|
||||
void MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val);
|
||||
|
||||
void SetValue(const CPUMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const GPUMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, size_t matrixFlags = matrixFlagNormal);
|
||||
|
||||
void SetDiagonalValue(const ElemType v);
|
||||
|
|
|
@ -260,22 +260,22 @@ void GPUSparseMatrix<ElemType>::CopyToDenseMatrix(GPUMatrix<ElemType>& denseMatr
|
|||
{
|
||||
if (sizeof(ElemType) == sizeof(float))
|
||||
{
|
||||
CUSPARSE_CALL(cusparseScsr2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (float*) Data(), RowLocation(), ColLocation(), (float*) denseMatrix.Data(), int(GetNumRows())));
|
||||
CUSPARSE_CALL(cusparseScsr2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (float*) Buffer(), RowLocation(), ColLocation(), (float*) denseMatrix.Data(), int(GetNumRows())));
|
||||
}
|
||||
else
|
||||
{
|
||||
CUSPARSE_CALL(cusparseDcsr2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (double*) Data(), RowLocation(), ColLocation(), (double*) denseMatrix.Data(), int(GetNumRows())));
|
||||
CUSPARSE_CALL(cusparseDcsr2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (double*) Buffer(), RowLocation(), ColLocation(), (double*) denseMatrix.Data(), int(GetNumRows())));
|
||||
}
|
||||
}
|
||||
else if (GetFormat() == MatrixFormat::matrixFormatSparseCSC)
|
||||
{
|
||||
if (sizeof(ElemType) == sizeof(float))
|
||||
{
|
||||
CUSPARSE_CALL(cusparseScsc2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (float*) Data(), RowLocation(), ColLocation(), (float*) denseMatrix.Data(), int(GetNumRows())));
|
||||
CUSPARSE_CALL(cusparseScsc2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (float*) Buffer(), RowLocation(), ColLocation(), (float*) denseMatrix.Data(), int(GetNumRows())));
|
||||
}
|
||||
else
|
||||
{
|
||||
CUSPARSE_CALL(cusparseDcsc2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (double*) Data(), RowLocation(), ColLocation(), (double*) denseMatrix.Data(), int(GetNumRows())));
|
||||
CUSPARSE_CALL(cusparseDcsc2dense(cusparseHandle, int(GetNumRows()), int(GetNumCols()), descr, (double*) Buffer(), RowLocation(), ColLocation(), (double*) denseMatrix.Data(), int(GetNumRows())));
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -414,6 +414,12 @@ void GPUSparseMatrix<ElemType>::ChangeDeviceTo(DEVICEID_TYPE to_id)
|
|||
SetComputeDeviceId(PrepareDevice(to_id));
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void GPUSparseMatrix<ElemType>::SetValue(const CPUMatrix<ElemType>& /*denseMatrix*/)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void GPUSparseMatrix<ElemType>::SetValue(const GPUMatrix<ElemType>& denseMatrix)
|
||||
{
|
||||
|
@ -2656,6 +2662,7 @@ template GPUSparseMatrix<char>::GPUSparseMatrix(GPUSparseMatrix<char>&&);
|
|||
template void GPUSparseMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
|
||||
template void GPUSparseMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
|
||||
template void GPUSparseMatrix<char>::SetValue(GPUMatrix<char> const&);
|
||||
template void GPUSparseMatrix<char>::SetValue(CPUMatrix<char> const&);
|
||||
template void GPUSparseMatrix<char>::CopyToDenseMatrix(GPUMatrix<char>&) const;
|
||||
template void GPUSparseMatrix<char>::CopyToCPUSparseMatrix(CPUSparseMatrix<char>&) const;
|
||||
template void GPUSparseMatrix<char>::ChangeDeviceTo(int);
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "CPUMatrix.h"
|
||||
#include "GPUMatrix.h"
|
||||
#include "CPUSparseMatrix.h"
|
||||
#include <functional>
|
||||
|
@ -281,10 +282,11 @@ public:
|
|||
return SecondaryIndexLocation();
|
||||
}
|
||||
|
||||
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const GPUMatrix<ElemType>& denseMatrix, const MatrixFormat matrixFormat);
|
||||
void SetValue(const CPUMatrix<ElemType>& denseMatrix);
|
||||
void SetValue(const GPUMatrix<ElemType>& denseMatrix);
|
||||
void SetValue(const CPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const GPUSparseMatrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const GPUMatrix<ElemType>& denseMatrix, const MatrixFormat matrixFormat);
|
||||
|
||||
GPUSPARSE_INDEX_TYPE* GetCondensedVector() const;
|
||||
void MaskColumnsValue(const GPUMatrix<char>& columnsMask, ElemType val);
|
||||
|
|
|
@ -1198,14 +1198,14 @@ void Matrix<ElemType>::SetColumn(const Matrix<ElemType>& colMat, size_t colInd)
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
void Matrix<ElemType>::SetValue(const Matrix<ElemType>& deepCopyFrom, const MatrixFormat format /*= matrixFormatSparseCSR*/)
|
||||
void Matrix<ElemType>::SetValue(const Matrix<ElemType>& deepCopyFrom)
|
||||
{
|
||||
if (this == &deepCopyFrom)
|
||||
return;
|
||||
|
||||
m_preferredDeviceId = deepCopyFrom.m_preferredDeviceId;
|
||||
DecideAndMoveToRightDevice(deepCopyFrom, *this);
|
||||
SwitchToMatrixType(deepCopyFrom.GetMatrixType(), format, false);
|
||||
SwitchToMatrixType(deepCopyFrom.GetMatrixType(), deepCopyFrom.GetFormat(), false);
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom,
|
||||
this,
|
||||
|
@ -1215,6 +1215,48 @@ void Matrix<ElemType>::SetValue(const Matrix<ElemType>& deepCopyFrom, const Matr
|
|||
m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix));
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void Matrix<ElemType>::AssignValuesOf(const Matrix<ElemType>& deepCopyFrom)
|
||||
{
|
||||
if (this == &deepCopyFrom)
|
||||
return;
|
||||
|
||||
DISPATCH_MATRIX_ON_FLAG(this, this,
|
||||
{
|
||||
// Set CPUMatrix from:
|
||||
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
|
||||
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
|
||||
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
|
||||
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
|
||||
{ m_CPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
|
||||
},
|
||||
{
|
||||
// Set GPUMatrix from:
|
||||
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
|
||||
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
|
||||
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
|
||||
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
|
||||
{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
|
||||
},
|
||||
{
|
||||
// Set CPUSparseMatrix from:
|
||||
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
|
||||
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
|
||||
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
|
||||
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
|
||||
{ m_CPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
|
||||
},
|
||||
{
|
||||
// Set GPUSparseMatrix from:
|
||||
DISPATCH_MATRIX_ON_FLAG(&deepCopyFrom, &deepCopyFrom,
|
||||
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUMatrix); },
|
||||
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUMatrix); },
|
||||
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
|
||||
{ m_GPUSparseMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void Matrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, const size_t matrixFlags)
|
||||
{
|
||||
|
@ -1248,18 +1290,18 @@ void Matrix<ElemType>::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYPE* h_CSCC
|
|||
// Note: The current implementation uses the xPUSparseMatrix as temporary space. This allows for memory sharing between calls. If
|
||||
// xPUSparseMatrix is a view, this code will cause an error during runtime stating that the view is not writable nor resizable.
|
||||
DISPATCH_MATRIX_ON_FLAG(this, this,
|
||||
{
|
||||
if (!m_CPUSparseMatrix) m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(matrixFormatSparseCSC, numRows, numCols, nz);
|
||||
m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
|
||||
m_CPUSparseMatrix->AssignColumnSliceToDense(*m_CPUMatrix, 0, numCols);
|
||||
},
|
||||
{
|
||||
if (!m_GPUSparseMatrix) m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(numRows, numCols, nz, GetDeviceId(), matrixFormatSparseCSC);
|
||||
m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
|
||||
m_GPUSparseMatrix->AssignColumnSliceToDense(*m_GPUMatrix, 0, numCols);
|
||||
},
|
||||
{ m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols); },
|
||||
{ m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols); });
|
||||
{
|
||||
if (!m_CPUSparseMatrix) m_CPUSparseMatrix = make_shared<CPUSparseMatrix<ElemType>>(matrixFormatSparseCSC, numRows, numCols, nz);
|
||||
m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
|
||||
m_CPUSparseMatrix->AssignColumnSliceToDense(*m_CPUMatrix, 0, numCols);
|
||||
},
|
||||
{
|
||||
if (!m_GPUSparseMatrix) m_GPUSparseMatrix = make_shared<GPUSparseMatrix<ElemType>>(numRows, numCols, nz, GetDeviceId(), matrixFormatSparseCSC);
|
||||
m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols);
|
||||
m_GPUSparseMatrix->AssignColumnSliceToDense(*m_GPUMatrix, 0, numCols);
|
||||
},
|
||||
{ m_CPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols); },
|
||||
{ m_GPUSparseMatrix->SetMatrixFromCSCFormat(h_CSCCol, h_Row, h_Val, nz, numRows, numCols); });
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -1397,52 +1439,58 @@ void Matrix<ElemType>::NormalGrad(Matrix<ElemType>& gradients,
|
|||
|
||||
if (!useNesterovMomentum)
|
||||
{
|
||||
DISPATCH_MATRIX_ON_FLAG(&gradients,
|
||||
nullptr,
|
||||
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
|
||||
functionValues -= *this,
|
||||
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
|
||||
functionValues -= *this,
|
||||
if (momentum != 0) gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum);
|
||||
ScaleAndAdd(-learnRatePerSample, gradients, functionValues),
|
||||
if (momentum != 0) gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum);
|
||||
ScaleAndAdd(-learnRatePerSample, gradients, functionValues));
|
||||
DISPATCH_MATRIX_ON_FLAG(&gradients, nullptr,
|
||||
{
|
||||
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
|
||||
functionValues -= *this;
|
||||
},
|
||||
{
|
||||
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
|
||||
functionValues -= *this;
|
||||
},
|
||||
{
|
||||
if (momentum != 0) gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum);
|
||||
ScaleAndAdd(-learnRatePerSample, gradients, functionValues);
|
||||
},
|
||||
{
|
||||
if (momentum != 0) gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum);
|
||||
ScaleAndAdd(-learnRatePerSample, gradients, functionValues);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
DISPATCH_MATRIX_ON_FLAG(&gradients,
|
||||
nullptr,
|
||||
{ /* CPU dense */
|
||||
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradients, functionValues);
|
||||
// w_t = w_{t-1} - momentum * v_ {t-1} - (1-momentum)*learnRatePerSampele*gardient,
|
||||
},
|
||||
{ /* GPU dense */
|
||||
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradients, functionValues);
|
||||
},
|
||||
{ /* CPU sparse */
|
||||
if (momentum != 0)
|
||||
{
|
||||
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
|
||||
gradientCache.SetValue(gradients);
|
||||
gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradientCache, functionValues);
|
||||
}
|
||||
},
|
||||
{ /* GPU sparse */
|
||||
if (momentum != 0)
|
||||
{
|
||||
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
|
||||
gradientCache.SetValue(gradients);
|
||||
gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradientCache, functionValues);
|
||||
}
|
||||
});
|
||||
DISPATCH_MATRIX_ON_FLAG(&gradients, nullptr,
|
||||
{ /* CPU dense */
|
||||
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradients, functionValues);
|
||||
// w_t = w_{t-1} - momentum * v_ {t-1} - (1-momentum)*learnRatePerSampele*gardient,
|
||||
},
|
||||
{ /* GPU dense */
|
||||
ScaleAndAdd((1 - momentum) * learnRatePerSample, gradients, momentum, *this);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradients, functionValues);
|
||||
},
|
||||
{ /* CPU sparse */
|
||||
if (momentum != 0)
|
||||
{
|
||||
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
|
||||
gradientCache.AssignValuesOf(gradients);
|
||||
gradients.m_CPUSparseMatrix->NormalGrad(*m_CPUMatrix, momentum);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradientCache, functionValues);
|
||||
}
|
||||
},
|
||||
{ /* GPU sparse */
|
||||
if (momentum != 0)
|
||||
{
|
||||
Matrix<ElemType> gradientCache(gradients.GetDeviceId());
|
||||
gradientCache.AssignValuesOf(gradients);
|
||||
gradients.m_GPUSparseMatrix->NormalGrad(*m_GPUMatrix, momentum);
|
||||
ScaleAndAdd(-momentum, *this, functionValues);
|
||||
ScaleAndAdd(-(1 - momentum) * learnRatePerSample, gradientCache, functionValues);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1543,7 +1591,7 @@ Matrix<ElemType> Matrix<ElemType>::RepMat(const Matrix<ElemType>& frmMat, const
|
|||
Matrix<ElemType> c(nRows, newCols, frmMat.GetDeviceId());
|
||||
for (size_t i = 0; i < colRatio; i++)
|
||||
{
|
||||
c.ColumnSlice(i * nCols, nCols).SetValue(frmMat);
|
||||
c.ColumnSlice(i * nCols, nCols).AssignValuesOf(frmMat);
|
||||
}
|
||||
|
||||
return c;
|
||||
|
@ -5007,9 +5055,9 @@ Matrix<ElemType>& Matrix<ElemType>::Shift(const Matrix<ElemType>& a, int shift)
|
|||
long n = (long) GetNumCols();
|
||||
|
||||
if (shift >= 0 && shift < n)
|
||||
us.ColumnSlice(shift, n - shift).SetValue(a.ColumnSlice(0, n - shift));
|
||||
us.ColumnSlice(shift, n - shift).AssignValuesOf(a.ColumnSlice(0, n - shift));
|
||||
if (shift < 0 && shift > -n)
|
||||
us.ColumnSlice(0, n + shift).SetValue(a.ColumnSlice(-shift, n + shift));
|
||||
us.ColumnSlice(0, n + shift).AssignValuesOf(a.ColumnSlice(-shift, n + shift));
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -5328,7 +5376,9 @@ template size_t Matrix<char>::GetNumRows() const;
|
|||
template size_t Matrix<char>::GetNumCols() const;
|
||||
template void Matrix<char>::SetValue(const char);
|
||||
template void Matrix<char>::SetValue(size_t numRows, const size_t numCols, int deviceId, char* pArray, size_t matrixFlags);
|
||||
template void Matrix<char>::SetValue(const Matrix<char>&, MatrixFormat);
|
||||
//template void Matrix<char>::SetValue(const Matrix<char>&, MatrixFormat);
|
||||
template void Matrix<char>::SetValue(const Matrix<char>&);
|
||||
template void Matrix<char>::AssignValuesOf (const Matrix<char>&);
|
||||
template bool Matrix<char>::IsEmpty() const;
|
||||
template void Matrix<char>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, bool growOnly);
|
||||
|
||||
|
|
|
@ -236,7 +236,11 @@ public:
|
|||
|
||||
void SetValue(const ElemType v);
|
||||
void SetValue(const DeviceBoundNumber<ElemType>& db_number);
|
||||
void SetValue(const Matrix<ElemType>& deepCopyFrom, const MatrixFormat format = matrixFormatSparseCSR); // BUGBUG: default for 'format' is unexpected
|
||||
//void SetValue (const Matrix<ElemType>& deepCopyFrom, const MatrixFormat format = matrixFormatSparseCSR); // BUGBUG: default for 'format' is unexpected
|
||||
// SetValue respects the source matrix's information. It moves the target's location (if necessary), and then copies the sources values.
|
||||
void SetValue (const Matrix<ElemType>& deepCopyFrom);
|
||||
// AssignValuesOf respects the target matrix's information. It copies the values from the target into the memory of the source.
|
||||
void AssignValuesOf(const Matrix<ElemType>& deepCopyFrom);
|
||||
void SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, const size_t matrixFlags = matrixFlagNormal);
|
||||
void SetValue(const size_t rIdx, const size_t cIdx, ElemType val); // set matrix sparsely
|
||||
void SetValue(const size_t numRows, const size_t numCols, std::initializer_list<ElemType> l) // SetValue(2,3, {1,2,3, 4,5,6});
|
||||
|
|
|
@ -335,6 +335,8 @@ void SparseBinaryInput<ElemType>::Init(std::map<std::wstring, std::wstring> rena
|
|||
|
||||
m_inFile.seekg(0, ios::end);
|
||||
m_fileSize = (size_t) m_inFile.tellg();
|
||||
|
||||
m_maxMBSize = 0;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -436,19 +438,28 @@ void SparseBinaryInput<ElemType>::StartDistributedMinibatchLoop(size_t mbSize, s
|
|||
|
||||
ReadOffsets(startMB, m_windowSize);
|
||||
|
||||
m_maxMBSize = 0;
|
||||
size_t maxMBSize = 0;
|
||||
for (size_t c = 0; c < m_windowSize; c++)
|
||||
{
|
||||
m_maxMBSize = max(m_maxMBSize, (size_t)(m_offsets[c + 1] - m_offsets[c]));
|
||||
maxMBSize = max(maxMBSize, (size_t)(m_offsets[c + 1] - m_offsets[c]));
|
||||
// fprintf(stderr, "m_offsets[%lu] = %lu\n", c, m_offsets[c]);
|
||||
}
|
||||
// fprintf(stderr, "max mb size: %ld\n", m_maxMBSize);
|
||||
size_t maxMem = 1024 * 1024 * 1024; // 1GB
|
||||
size_t maxPointers = maxMem / m_maxMBSize;
|
||||
for (size_t c = 0; c < maxPointers; c++)
|
||||
if (maxMBSize > m_maxMBSize)
|
||||
{
|
||||
void* dataBuffer = malloc(m_maxMBSize);
|
||||
m_dataToProduce.push(dataBuffer);
|
||||
m_maxMBSize = maxMBSize;
|
||||
while (m_dataToProduce.size() > 0)
|
||||
{
|
||||
free(m_dataToProduce.pop());
|
||||
}
|
||||
// fprintf(stderr, "max mb size: %ld\n", m_maxMBSize);
|
||||
|
||||
size_t maxMem = 1024 * 1024 * 1024; // 1GB
|
||||
size_t maxPointers = maxMem / m_maxMBSize;
|
||||
for (size_t c = 0; c < maxPointers; c++)
|
||||
{
|
||||
void* dataBuffer = malloc(m_maxMBSize);
|
||||
m_dataToProduce.push(dataBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
std::thread readData([this]
|
||||
|
|
|
@ -34,6 +34,15 @@ private:
|
|||
std::deque<T> d_queue;
|
||||
|
||||
public:
|
||||
void release()
|
||||
{
|
||||
while (!d_queue.empty())
|
||||
free(d_queue.pop_front());
|
||||
}
|
||||
size_t size()
|
||||
{
|
||||
return d_queue.size();
|
||||
}
|
||||
void push(T const& value)
|
||||
{
|
||||
{
|
||||
|
|
|
@ -120,7 +120,7 @@ void oldRNNForwardPropSRP(const size_t timeIdxInSeq, const int delay, const bool
|
|||
inp = pastActivity.ColumnSlice(d + indexInBatch, 1);
|
||||
else
|
||||
inp = inputFunctionValues.ColumnSlice(d + indexInBatch, 1);
|
||||
out.SetValue(inp);
|
||||
out.AssignValuesOf(inp);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -884,7 +884,7 @@ BOOST_FIXTURE_TEST_CASE(MatrixColumnSlice, RandomSeedFixture)
|
|||
cg.SetUniformRandomValue(-1, 1, IncrementCounter());
|
||||
|
||||
Matrix<float> dg(k, m, c_deviceIdZero);
|
||||
dg.SetValue(cg);
|
||||
dg.AssignValuesOf(cg);
|
||||
|
||||
Matrix<float>::MultiplyAndAdd(ag, false, bg, false, dg);
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче