add CopyTo for onehot vector; refactor code to share between CopyTo methods;

fix bug using GPU device. The cpuArrayView must be alive in the function scope

add CopyTo for OneHot; support Value in both dense and sparse format
This commit is contained in:
Zhou Wang 2016-12-17 22:07:11 +01:00
Родитель f5fdbeb028
Коммит f1cbc08e94
3 изменённых файлов: 456 добавлений и 293 удалений

Просмотреть файл

@ -21,6 +21,7 @@
#include <algorithm>
#include <mutex>
#include <future>
#include <functional>
#ifdef SWIG
#define final
@ -956,24 +957,10 @@ namespace CNTK
}
template <typename ElementType>
void CopyTo(const NDShape& sampleShape, std::vector<std::vector<ElementType>>& sequences, std::vector<size_t>& sequenceLens, bool isResizeable = true)
void CopyTo(const NDShape& sampleShape, std::vector<std::vector<ElementType>>& sequences, std::vector<size_t>& sequenceLengths, bool isResizeable = true)
{
// Check the data type matches
if (AsDataType<ElementType>() != GetDataType())
InvalidArgument("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(GetDataType()));
// Todo: convert sparse into dense.
if (GetStorageFormat() != StorageFormat::Dense)
InvalidArgument("Only the dense storage format is supported now.");
auto valueRank = Shape().Rank();
auto sampleRank = sampleShape.Rank();
if ((valueRank < sampleRank + 1) || (valueRank > sampleRank + 2) || (sampleShape != Shape().SubShape(0, sampleRank)))
RuntimeError("The variable and the Value does not have the same tensor shape.");
CheckAndResizeOutputBuffer(sampleRank, sampleShape.TotalSize(), sequences, sequenceLens, isResizeable);
CopyToImpl(sampleShape, sequences, sequenceLens);
CheckAndResizeOutputBuffer(sampleShape.Rank(), sampleShape.TotalSize(), sequences, sequenceLengths, isResizeable);
CopyToVector(sampleShape, sequences, sequenceLengths);
}
///
@ -981,38 +968,26 @@ namespace CNTK
/// The sequence buffer is on CPU.
/// The Value should have the same axes as variable.
///
template <typename ElementType>
void CopyTo(const NDShape& sampleShape, std::vector<std::vector<size_t>>& sequences)
{
std::vector<size_t> seqLens;
CopyTo(sampleShape, sequences, seqLens, true);
}
template <typename ElementType>
void CopyTo(const NDShape& sampleShape, std::vector<std::vector<size_t>>& sequences, std::vector<size_t>& sequenceLens, bool isResizeable = true)
void CopyTo(const NDShape& sampleShape, std::vector<std::vector<size_t>>& sequences, std::vector<size_t>& sequenceLengths, bool isResizeable = true)
{
// Check the data type matches
if (AsDataType<ElementType>() != GetDataType())
InvalidArgument("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(GetDataType()));
// Todo: convert sparse into dense.
if (GetStorageFormat() != StorageFormat::Dense)
InvalidArgument("Only the dense storage format is supported now.");
if (sampleShape[0] != sampleShape.TotalSize())
InvalidArgument("")
auto valueRank = Shape().Rank();
auto sampleRank = sampleShape.Rank();
if ((valueRank < sampleRank + 1) || (valueRank > sampleRank + 2) || (sampleShape != Shape().SubShape(0, sampleRank)))
RuntimeError("The variable and the Value does not have the same tensor shape.");
// For OneHot vector, only 1 value is needed for a sample.
CheckAndResizeOutputBuffer(sampleRank, 1, sequences, sequenceLens, isResizeable);
// CopyToImpl(sampleShape, sequences, sequenceLens);
CheckAndResizeOutputBuffer(sampleShape.Rank(), 1, sequences, sequenceLengths, isResizeable);
auto dataType = GetDataType();
if (dataType == DataType::Float)
{
CopyToVector<float>(sampleShape, sequences, sequenceLengths);
}
else if (dataType == DataType::Double)
{
CopyToVector<double>(sampleShape, sequences, sequenceLengths);
}
}
private:
template <typename ElementType>
@ -1021,10 +996,16 @@ namespace CNTK
CNTK_API static ValuePtr Create(const NDShape& sampleShape, const std::vector<NDArrayViewPtr>& sequences, const std::vector<bool>& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly, bool createNewCopy);
template <typename ElementType>
CNTK_API void CopyToImpl(const NDShape& sampleShape, std::vector<std::vector<ElementType>>& sequences, std::vector<size_t>& sequenceLens);
CNTK_API void CopyToVector(const NDShape& sampleShape, std::vector<std::vector<ElementType>>& sequences, std::vector<size_t>& sequenceLengths);
template <typename ElementType>
void CheckAndResizeOutputBuffer(const size_t sampleRank, const size_t sampleSize, std::vector<std::vector<ElementType>>& sequences, std::vector<size_t>& sequenceLens, bool isResizeable)
CNTK_API void CopyToVector(const NDShape& sampleShape, std::vector<std::vector<size_t>>& sequences, std::vector<size_t>& sequenceLengths);
template <typename ValueType, typename DestType>
void CopyToImpl(const NDShape& sampleShape, std::vector<std::vector<DestType>>& sequences, std::vector<size_t>& sequenceLengths);
template <typename ElementType>
void CheckAndResizeOutputBuffer(const size_t sampleRank, const size_t sampleSize, std::vector<std::vector<ElementType>>& sequences, std::vector<size_t>& sequenceLengths, bool isResizeable)
{
auto valueRank = Shape().Rank();
size_t numOfSequences;
@ -1044,15 +1025,19 @@ namespace CNTK
}
// resize the sequnce length buffer to reflect the number of sequences in output.
if (sequenceLens.size() < numOfSequences)
sequenceLens.resize(numOfSequences);
if (sequenceLengths.size() < numOfSequences)
sequenceLengths.resize(numOfSequences);
// Check whether the additional space in the sequences output buffer needs to be allocated if it is resizeable.
if (isResizeable)
{
const MaskKind* maskData = nullptr;
if (m_mask != nullptr)
maskData = Device() != DeviceDescriptor::CPUDevice() ? m_mask->DeepClone(DeviceDescriptor::CPUDevice())->DataBuffer() : m_mask->DataBuffer();
NDMaskPtr cpuMask = nullptr;
if (Mask() != nullptr)
{
cpuMask = (Device() != DeviceDescriptor::CPUDevice()) ? Mask()->DeepClone(DeviceDescriptor::CPUDevice()) : Mask();
maskData = cpuMask->DataBuffer();
}
size_t sampleCount, seqStart;
for (auto seqIndex = 0; seqIndex < numOfSequences; seqIndex++)

Просмотреть файл

@ -359,197 +359,150 @@ namespace CNTK
}
}
// If outputData.size() is 0, CNTK will alocate stroage for data. Otherwise, the caller is reposible for allocating sufficient stroage space for saving the data.
template <typename ElementType, typename DestType>
void DirectCopy(const ElementType *source, const size_t sampleCount, const size_t sampleSize, std::vector<DestType>& dest, size_t& destSampleStart);
template <typename ElementType, typename DestType>
void CopyDenseToOneHot(const ElementType *source, const size_t sampleCount, const size_t sampleSize, std::vector<DestType>& dest, size_t& destSampleStart);
template <typename ElementType>
void Value::CopyTo(const NDShape& sampleShape, std::vector<std::vector<ElementType>>& outputData)
void Value::CopyToVector(const NDShape& sampleShape, std::vector<std::vector<ElementType>>& sequences, std::vector<size_t>& sequenceLengths)
{
// Check the data type matches
if (AsDataType<ElementType>() != GetDataType())
InvalidArgument("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(GetDataType()));
// Todo: convert sparse into dense.
if (GetStorageFormat() != StorageFormat::Dense)
InvalidArgument("Only the dense storage format is supported now.");
CopyToImpl<ElementType, ElementType>(sampleShape, sequences, sequenceLengths);
}
template <typename ElementType>
CNTK_API void Value::CopyToVector(const NDShape& sampleShape, std::vector<std::vector<size_t>>& sequences, std::vector<size_t>& sequenceLengths)
{
if (sampleShape[0] != sampleShape.TotalSize())
InvalidArgument("");
CopyToImpl<ElementType, size_t>(sampleShape, sequences, sequenceLengths);
}
template <typename ValueType, typename DestType>
void Value::CopyToImpl(const NDShape& sampleShape,
std::vector<std::vector<DestType>>& sequences,
std::vector<size_t>& sequenceLengths)
{
auto valueRank = Shape().Rank();
if ((valueRank <= 2) || (sampleShape != Shape().SubShape(0, valueRank - 2))
auto sampleRank = sampleShape.Rank();
if ((valueRank < sampleRank + 1) || (valueRank > sampleRank + 2) || (sampleShape != Shape().SubShape(0, sampleRank)))
RuntimeError("The variable and the Value does not have the same tensor shape.");
// Copy data to the CPU device if required.
NDArrayViewPtr cpuArrayView;
NDMaskPtr cpuNDMask;
if (Device() != DeviceDescriptor::CPUDevice())
size_t numOfSequences;
size_t maxSequenceLen;
if (valueRank == sampleShape.Rank() + 1)
{
cpuArrayView = m_data->DeepClone(DeviceDescriptor::CPUDevice());
cpuNDMask = m_mask->DeepClone(DeviceDescriptor::CPUDevice());
// no batch axis, only sequence axis
numOfSequences = 1;
maxSequenceLen = Shape()[valueRank - 1];
}
else
{
cpuArrayView = m_data;
cpuNDMask = m_mask;
assert(valueRank == sampleShape.Rank() + 2);
numOfSequences = Shape()[valueRank - 1];
maxSequenceLen = Shape()[valueRank - 2];
}
auto maskData = cpuNDMask->DataBuffer();
auto valueData = cpuArrayView->DataBuffer<ElementType>();
auto numOfSequences = Shape()[valueRank - 1];
auto maxSequenceLen = Shape()[valueRank - 2];
// Check output buffer size
if (sequences.size() < numOfSequences)
RuntimeError("The size of output buffer is too small");
// Check sequenceLengths size.
if (sequenceLengths.size() < numOfSequences)
{
RuntimeError("The size of sequenceLengths does not match.");
}
else
{
for (size_t i = numOfSequences; i < sequenceLengths.size(); i++)
sequenceLengths[i] = 0;
}
// Copy data to the CPU device if required.
const ValueType *valueData;
const MaskKind* maskData;
NDArrayViewPtr cpuArrayView;
NDMaskPtr cpuMask;
if (Device() != DeviceDescriptor::CPUDevice())
{
// Todo: leverage sparse if the original NDArrayView is in spase.
cpuArrayView = MakeSharedObject<NDArrayView>(GetDataType(), Data()->Shape(), DeviceDescriptor::CPUDevice());
cpuArrayView->CopyFrom(*Data());
cpuMask = Mask() != nullptr ? Mask()->DeepClone(DeviceDescriptor::CPUDevice()) : nullptr;
}
else
{
// Todo: direct process sparse data without copy
if (GetStorageFormat() != StorageFormat::Dense)
{
cpuArrayView = MakeSharedObject<NDArrayView>(GetDataType(), Data()->Shape(), DeviceDescriptor::CPUDevice());
cpuArrayView->CopyFrom(*Data());
}
else
{
cpuArrayView = Data();
}
cpuMask = Mask();
}
valueData = cpuArrayView->DataBuffer<ValueType>();
maskData = cpuMask != nullptr ? cpuMask->DataBuffer() : nullptr;
auto sampleSize = sampleShape.TotalSize();
bool needStorage = false;
if (outputData.size() == 0)
{
needStorage = true;
}
else if (numOfSequences > outputData.size())
{
RuntimeError("The size of the output buffer is smaller than the number of sequences.");
}
const ElementType *first, *last;
ElementType *dest;
std::vector<ElementType> seqBuf;
size_t count, current;
for (auto seqIndex = 0; seqIndex < numOfSequences; seqIndex++)
{
size_t seqStart = seqIndex * maxSequenceLen;
// Check the number of valid elements.
// Not using MaskedCount() to avoid extra data copy.
count = 0;
for (int i = 0; i < maxSequenceLen; i++)
size_t destSampleCount = 0;
if (maskData == nullptr)
{
if (maskData[seqStart + i] != MaskKind::Invalid)
count++;
}
if (needStorage)
// Todo: if function pointer or lambda could support template, switch to use them.
if (typeid(DestType) == typeid(size_t))
{
auto p = new std::vector<ElementType>(count * sampleSize);
outputData.push_back(*p);
CopyDenseToOneHot<ValueType, DestType>(valueData + seqStart * sampleSize, maxSequenceLen, sampleSize, sequences[seqIndex], destSampleCount);
}
seqBuf = outputData[seqIndex];
if (count * sampleSize > seqBuf.size())
else
{
RuntimeError("The sequenth %lu contains more data than the size of the provided vector.\n", (unsigned long)seqIndex);
DirectCopy<ValueType, DestType>(valueData + seqStart * sampleSize, maxSequenceLen, sampleSize, sequences[seqIndex], destSampleCount);
}
dest = seqBuf.data();
current = 0;
while (current < maxSequenceLen)
sequenceLengths[seqIndex] = destSampleCount;
}
else
{
// NDMask is not null
size_t current = seqStart;
size_t seqEnd = seqStart + maxSequenceLen;
while (current < seqEnd)
{
// find first valid mask.
while ((maskData[seqStart + current] == MaskKind::Invalid) && (current < maxSequenceLen))
while ((current < seqEnd) && (maskData[current] == MaskKind::Invalid))
current++;
first = valueData + (seqStart + current) * sampleSize;
auto sampleStart = current;
// find the next invalid mask.
while ((maskData[seqStart + current] != MaskKind::Invalid) && (current < maxSequenceLen))
while ((current < seqEnd) && (maskData[current] != MaskKind::Invalid))
current++;
last = valueData + (seqStart + current) * sampleSize;
if (last > first)
assert(current >= sampleStart);
if (current > sampleStart)
{
std::copy(first, last, dest);
dest += last - first;
assert(dest <= seqBuf.data() + count);
}
}
assert(dest == seqBuf.data() + count);
}
}
void Value::CopyTo(const NDShape& sampleShape, std::vector<std::vector<size_t>>& outputData)
// Todo: if function pointer or lambda could support template, switch to use them.
if (typeid(DestType) == typeid(size_t))
{
if (sampleShape.Rank() != 1)
RuntimeError("Only data of 1-D tensor can be copied to OneHot vector.");
auto valueRank = Shape().Rank();
// Check the shape matches.
if (sampleShape != Shape().SubShape(0, valueRank - 2))
InvalidArgument("The variable and the value does not have the same tensor shape.");
//// Todo: convert sparse into dense.
//if (GetStorageFormat() != StorageFormat::Dense)
// InvalidArgument("Only the dense storage format is supported now.");
//// Copy data to the CPU device if required.
//NDArrayViewPtr cpuArrayView;
//NDMaskPtr cpuNDMask;
//if (Device != DeviceDescriptor::CPUDevice())
//{
// cpuArrayView = m_data->DeepClone(DeviceDescriptor::CPUDevice());
// cpuNDMask = m_mask->DeepClone(DeviceDescriptor::CPUDevice());
//}
//else
//{
// cpuArrayView = m_data;
// cpuNDMask = m_mask;
//}
//auto maskData = cpuNDMask->DataBuffer();
//auto valueData = cpuArrayView->DataBuffer();
//auto numOfSequences = Shape[valueRank - 1];
//auto maxSequenceLen = Shape[valueRank - 2];
//auto sampleSize = variable.Shape().TotalSize();
//if (outputData == nullptr)
//{
// outputData = new std::vector<std::vector<size_t>>(numOfSequences);
//}
//if (numOfSequences > outputData.size())
//{
// RuntimeError("The size of output buffer is smaller than the number of sequences.");
//}
//ElementType *first, *last, *dest;
//size_t count;
//for (auto seqIndex = 0; seqIndex < numOfSequences; seqIndex++)
//{
// size_t seqStart = seqIndex * maxSequenceLen;
// // Check the number of valid elements.
// // Not using MaskedCount() to avoid extra data copy.
// count = 0;
// for (int i = 0; i < maxSequenceLen; i++)
// {
// if (maskData[seqStart + i] != MaskKind::Invalid)
// count++;
// }
// auto seqBuf = outputData[seqIndex];
// if (seqBuf == nullptr)
// {
// outputData[seqIndex] = seqBuf = new std::vector<ElementType>(count * sampleSize);
// }
// if (count * sampleSize > seqBuf.size())
// {
// RuntimeError("The sequenth %lu contains more data than the buffer size.\n", (unsigned long)seqIndex);
// }
// dest = seqBuf;
// while (current < maxSequenceLen)
// {
// // find first valid mask.
// while ((maskData[seqStart + current] == MaskKind::Invalid) && (current < maxSequenceLen))
// current++;
// first = valueData + (seqStart + current) * sampleSize;
// // find the next invalid mask.
// while ((maskData[seqStart + current] != MaskKind::Invalid) && (current < maxSequenceLen))
// current++;
// last = valueData + (seqStart + current) * sampleSize;
// if (last > first)
// {
// std::copy(first, last, dest);
// dest += last - first;
// assert(dest <= seqBuf + count);
// }
// }
// assert(dest == seqBuf + count);
//}
CopyDenseToOneHot<ValueType, DestType>(valueData + seqStart * sampleSize, current - sampleStart, sampleSize, sequences[seqIndex], destSampleCount);
}
else
{
DirectCopy<ValueType, DestType>(valueData + seqStart * sampleSize, current - sampleStart, sampleSize, sequences[seqIndex], destSampleCount);
}
}
}
sequenceLengths[seqIndex] = destSampleCount;
}
}
}
void PackedValue::Unpack() const
@ -585,11 +538,69 @@ namespace CNTK
}
}
template <typename ElementType, typename DestType>
void DirectCopy(const ElementType *source, const size_t sampleCount, const size_t sampleSize, std::vector<DestType>& dest, size_t& destSampleStart)
{
if (typeid(ElementType) != typeid(DestType))
RuntimeError("Source and destination must be the same data type.");
DestType *destData = dest.data();
if ((destSampleStart + sampleCount) * sampleSize > dest.size())
RuntimeError("The output buffer is too small.");
std::copy(source, source + sampleCount * sampleSize, reinterpret_cast<ElementType *>(destData + destSampleStart * sampleSize));
destSampleStart += sampleCount;
}
template <typename ElementType, typename DestType>
void CopyDenseToOneHot(const ElementType *source, const size_t sampleCount, const size_t sampleSize, std::vector<DestType>& dest, size_t& destSampleStart)
{
if (typeid(DestType) != typeid(size_t))
{
RuntimeError("The destination data type must be size_t.");
}
const ElementType *currentp = source;
const ElementType *lastp = source + sampleCount * sampleSize;
while (currentp < lastp)
{
auto sampleEndp = currentp + sampleSize;
auto indexp = std::find_if(currentp, sampleEndp, [](const ElementType val) {
return val != 0;
});
if (indexp == sampleEndp)
{
RuntimeError("Cannot convert to onehot vector: the sample does not have any non-zero value.");
}
else
{
if (std::find_if(indexp + 1, sampleEndp, [](const ElementType val) {
return val != 0;
}) != sampleEndp)
{
RuntimeError("Cannot convert to onehot vector: more than one non-zero value in the sample.");
}
else
{
if (destSampleStart >= dest.size())
RuntimeError("The output buffer is too small.");
else
{
dest[destSampleStart++] = static_cast<DestType>(indexp - currentp);
}
}
}
currentp += sampleSize;
}
assert(currentp == lastp);
}
// Explicit template instantiations
template /*static*/ CNTK_API ValuePtr Value::Create<float>(const NDShape& sampleShape, const std::vector<std::vector<float>>& sequences, const std::vector<bool>& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly/* = false*/);
template /*static*/ CNTK_API ValuePtr Value::Create<double>(const NDShape& sampleShape, const std::vector<std::vector<double>>& sequences, const std::vector<bool>& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly/* = false*/);
template /*static*/ CNTK_API ValuePtr Value::Create<float>(size_t vocabSize, const std::vector<std::vector<size_t>>& oneHotSequences, const std::vector<bool>& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly/* = false*/);
template /*static*/ CNTK_API ValuePtr Value::Create<double>(size_t vocabSize, const std::vector<std::vector<size_t>>& oneHotSequences, const std::vector<bool>& sequenceStartFlags, const DeviceDescriptor& device, bool readOnly/* = false*/);
template CNTK_API void Value::CopyTo<float>(const NDShape& sampleShape, std::vector<std::vector<float>>& sequences);
template CNTK_API void Value::CopyTo<double>(const NDShape& sampleShape, std::vector<std::vector<double>>& sequences);
template CNTK_API void Value::CopyToVector<float>(const NDShape& sampleShape, std::vector<std::vector<float>>& sequences, std::vector<size_t>& sequencesLens);
template CNTK_API void Value::CopyToVector<double>(const NDShape& sampleShape, std::vector<std::vector<double>>& sequences, std::vector<size_t>& sequencesLens);
}

Просмотреть файл

@ -147,6 +147,7 @@ void FillDenseMatrixData(vector<vector<ElementType>>& databuf, const vector<siz
template <typename ElementType>
void ValueCreationNoNDMaskTest(const DeviceDescriptor device, bool readOnly)
{
//Todo: test numberOfSequences == 1: no batch access, need to adapt checkShape(), CheckValue()
size_t numberOfSequences = 5;
size_t seqLen = 4;
vector<size_t> dims{3, 2};
@ -242,24 +243,24 @@ void ValueCreationOneHotWithNDMaskTest(const DeviceDescriptor device, bool readO
template <typename ElementType>
void CheckCopyToOutput(const size_t sampleSize, std::vector<std::vector<ElementType>> expected, std::vector<std::vector<ElementType>> actual)
{
std::vector<size_t> seqLens(0);
CheckCopyToOutput(sampleSize, expected, actual, seqLens);
std::vector<size_t> actualSeqLens(0);
CheckCopyToOutput(sampleSize, expected, actual, actualSeqLens);
}
template <typename ElementType>
void CheckCopyToOutput(const size_t sampleSize, std::vector<std::vector<ElementType>>& expected, std::vector<std::vector<ElementType>>& actual, std::vector<size_t>& seqLens)
void CheckCopyToOutput(const size_t sampleSize, std::vector<std::vector<ElementType>>& expected, std::vector<std::vector<ElementType>>& actual, std::vector<size_t>& actualSeqLens)
{
bool useSeqLens;
if (seqLens.size() != 0)
if (actualSeqLens.size() != 0)
{
useSeqLens = true;
if (seqLens.size() < expected.size())
ReportFailure("The seqLens size does not match. expected: %" PRIu64 " actual: %" PRIu64 "\n", expected.size(), seqLens.size());
if (actualSeqLens.size() < expected.size())
ReportFailure("The actualSeqLens size does not match. expected: %" PRIu64 " actual: %" PRIu64 "\n", expected.size(), actualSeqLens.size());
else
{
for (size_t i = expected.size(); i < seqLens.size(); i++)
if (seqLens[i] != 0)
ReportFailure("The seqLens contains invalid data.");
for (size_t i = expected.size(); i < actualSeqLens.size(); i++)
if (actualSeqLens[i] != 0)
ReportFailure("The actualSeqLens contains invalid data.");
}
if (actual.size() < expected.size())
@ -274,7 +275,7 @@ void CheckCopyToOutput(const size_t sampleSize, std::vector<std::vector<ElementT
for (size_t i = 0; i < expected.size(); i++)
{
auto len = useSeqLens ? seqLens[i] * sampleSize : actual[i].size();
auto len = useSeqLens ? actualSeqLens[i] * sampleSize : actual[i].size();
if ((actual[i].size() < len) || (expected[i].size() != len))
{
ReportFailure("Seq " PRIu64 " does not match.\n", i);
@ -294,84 +295,249 @@ void ValueCopyToDenseTest(const DeviceDescriptor device)
{
NDShape sampleShape{{2, 3}};
auto sampleSize = sampleShape.TotalSize();
std::vector<std::vector<ElementType>> input;
std::vector<std::vector<ElementType>> output;
std::vector<std::vector<ElementType>> sequences;
std::vector<size_t> seqLens;
std::vector<size_t> expectedSeqLens;
std::vector<size_t> actualSeqLens;
//Todo: add tests sparse to dense.
// Check single sample.
sequences.clear();
sequences.resize(1, std::vector<ElementType>(sampleSize));
// Todo: use generateSequences in common.h
for (size_t i = 0; i < sampleSize; i++)
sequences[0][i] = static_cast<ElementType>(i);
auto val = Value::Create(sampleShape, sequences, device);
size_t batchCount = 1;
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(1);
input = GenerateSequences<ElementType>(expectedSeqLens, sampleShape);
auto val = Value::Create(sampleShape, input, device);
val->CopyTo(sampleShape, output);
CheckCopyToOutput(sampleSize, sequences, output);
CheckCopyToOutput(sampleSize, input, output);
// Check batch of sample.
size_t batchCount = 2;
sequences.clear();
sequences.resize(2, std::vector<ElementType>(sampleSize));
for (size_t s = 0; s < batchCount; s++)
{
for (size_t i = 0; i < sampleSize; i++)
{
sequences[s][i] = static_cast<ElementType>(s * 10 + i);
}
}
val = Value::Create(sampleShape, sequences, device);
VerifyException([&val, &sampleShape, &output, &seqLens]() {
val->CopyTo(sampleShape, output, seqLens, false);
}, "The output buffer is too small.");
val->CopyTo(sampleShape, output, seqLens);
CheckCopyToOutput(sampleSize, sequences, output, seqLens);
batchCount = 2;
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(1);
input = GenerateSequences<ElementType>(expectedSeqLens, sampleShape);
val = Value::Create(sampleShape, input, device);
// Check sequence of sample
size_t sampleCount = 4;
sequences.clear();
sequences.resize(1, std::vector<ElementType>(sampleSize * sampleCount));
for (size_t i = 0; i < sampleSize * sampleCount; i++)
{
sequences[0][i] = static_cast<ElementType>(i);
}
val = Value::Create(sampleShape, sequences, device);
VerifyException([&val, &sampleShape, &output, &seqLens]() {
val->CopyTo(sampleShape, output, seqLens, false);
// The batch axis is too small
VerifyException([&val, &sampleShape, &output, &actualSeqLens]() {
val->CopyTo(sampleShape, output, actualSeqLens, false);
}, "The output buffer is too small.");
val->CopyTo(sampleShape, output, seqLens);
CheckCopyToOutput(sampleSize, sequences, output, seqLens);
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(sampleSize, input, output);
// Check sequence of sample, but single batch
size_t sampleCount = 4;
batchCount = 1;
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(sampleCount);
input = GenerateSequences<ElementType>(expectedSeqLens, sampleShape);
val = Value::Create(sampleShape, input, device);
// The sequence axis is too small
VerifyException([&val, &sampleShape, &output, &actualSeqLens]() {
val->CopyTo(sampleShape, output, actualSeqLens, false);
}, "The output buffer is too small.");
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(sampleSize, input, output, actualSeqLens);
// Check batch of sequence of the same length, no mask needed.
batchCount = 4;
sampleCount = 3;
sequences.clear();
sequences.resize(batchCount, std::vector<ElementType>(sampleSize * sampleCount));
for (size_t s = 0; s < batchCount; s++)
{
for (size_t i = 0; i < sampleSize * sampleCount; i++)
{
sequences[s][i] = static_cast<ElementType>(s * 10 + i);
}
}
val = Value::Create(sampleShape, sequences, device);
val->CopyTo(sampleShape, output, seqLens);
CheckCopyToOutput(sampleSize, sequences, output, seqLens);
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(sampleCount);
input = GenerateSequences<ElementType>(expectedSeqLens, sampleShape);
val = Value::Create(sampleShape, input, device);
// The batch axis is too small, the sequence axis is sufficient.
VerifyException([&val, &sampleShape, &output, &actualSeqLens]() {
val->CopyTo(sampleShape, output, actualSeqLens, false);
}, "The output buffer is too small.");
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(sampleSize, input, output, actualSeqLens);
// Check batch of sequecnes with different length, mask needed.
std::vector<size_t> sampleCountList{6, 9, 2, 1, 5, 3, 4};
std::vector<size_t> sampleCountList {6, 9, 2};
batchCount = sampleCountList.size();
sequences.clear();
sequences.resize(batchCount, std::vector<ElementType>(0));
for (size_t s = 0; s < batchCount; s++)
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(sampleCountList[i]);
input = GenerateSequences<ElementType>(expectedSeqLens, sampleShape);
val = Value::Create(sampleShape, input, device);
// The batch axis is sufficient, the sequence axis is too small
VerifyException([&val, &sampleShape, &output, &actualSeqLens]() {
val->CopyTo(sampleShape, output, actualSeqLens, false);
}, "The output buffer is too small.");
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(sampleSize, input, output, actualSeqLens);
// More batches and sequences
sampleCountList = {6, 12, 2, 1, 5, 3, 4};
batchCount = sampleCountList.size();
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(sampleCountList[i]);
input = GenerateSequences<ElementType>(expectedSeqLens, sampleShape);
val = Value::Create(sampleShape, input, device);
// Both the batch and sequence axes are too small.
VerifyException([&val, &sampleShape, &output, &actualSeqLens]() {
val->CopyTo(sampleShape, output, actualSeqLens, false);
}, "The output buffer is too small.");
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(sampleSize, input, output, actualSeqLens);
// Random batch and sequence
int testRun = 4;
size_t maxNumOfSequences = 100;
size_t maxSequenceLen = 100;
// This is only used to generate number of sequnces, so boost distribution is not needed.
std::default_random_engine generator;
std::uniform_int_distribution<size_t> distribution(1, maxNumOfSequences);
for (int i = 0; i < testRun; i++)
{
for (size_t i = 0; i < sampleSize * sampleCountList[s]; i++)
batchCount = distribution(generator);
expectedSeqLens = GenerateSequenceLengths(batchCount, maxSequenceLen);
input = GenerateSequences<ElementType>(expectedSeqLens, sampleShape);
val = Value::Create(sampleShape, input, device);
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(sampleSize, input, output, actualSeqLens);
}
}
template <typename ElementType>
void ValueCopyToOneHotTest(const DeviceDescriptor device)
{
size_t dim = 100;
NDShape sampleShape{{dim}};
std::vector<std::vector<size_t>> input;
std::vector<std::vector<size_t>> output;
std::vector<size_t> expectedSeqLens;
std::vector<size_t> actualSeqLens;
// Todo: add tests dense to sparse
// Check single sample.
size_t batchCount = 1;
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(1);
input = GenerateOneHotSequences(expectedSeqLens, dim);
auto val = Value::Create<ElementType>(dim, input, device);
val->CopyTo(sampleShape, output);
CheckCopyToOutput(1, input, output);
// Check batch of sample.
batchCount = 2;
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(1);
input = GenerateOneHotSequences(expectedSeqLens, dim);
val = Value::Create<ElementType>(dim, input, device);
// The batch axis is too small
VerifyException([&val, &sampleShape, &output, &actualSeqLens]() {
val->CopyTo(sampleShape, output, actualSeqLens, false);
}, "The output buffer is too small.");
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(1, input, output);
// Check sequence of sample, but single batch
size_t sampleCount = 4;
batchCount = 1;
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(sampleCount);
input = GenerateOneHotSequences(expectedSeqLens, dim);
val = Value::Create<ElementType>(dim, input, device);
// The sequence axis is too small
VerifyException([&val, &sampleShape, &output, &actualSeqLens]() {
val->CopyTo(sampleShape, output, actualSeqLens, false);
}, "The output buffer is too small.");
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(1, input, output, actualSeqLens);
// Check batch of sequence of the same length, no mask needed.
batchCount = 4;
sampleCount = 3;
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(sampleCount);
input = GenerateOneHotSequences(expectedSeqLens, dim);
val = Value::Create<ElementType>(dim, input, device);
// The batch axis is too small, the sequence axis is sufficient.
VerifyException([&val, &sampleShape, &output, &actualSeqLens]() {
val->CopyTo(sampleShape, output, actualSeqLens, false);
}, "The output buffer is too small.");
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(1, input, output, actualSeqLens);
// Check batch of sequecnes with different length, mask needed.
std::vector<size_t> sampleCountList{6, 9, 2};
batchCount = sampleCountList.size();
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(sampleCountList[i]);
input = GenerateOneHotSequences(expectedSeqLens, dim);
val = Value::Create<ElementType>(dim, input, device);
// The batch axis is sufficient, the sequence axis is too small
VerifyException([&val, &sampleShape, &output, &actualSeqLens]() {
val->CopyTo(sampleShape, output, actualSeqLens, false);
}, "The output buffer is too small.");
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(1, input, output, actualSeqLens);
// More batches and sequences
sampleCountList = {6, 12, 2, 1, 5, 3, 4};
batchCount = sampleCountList.size();
expectedSeqLens.clear();
for (size_t i = 0; i < batchCount; i++)
expectedSeqLens.push_back(sampleCountList[i]);
input = GenerateOneHotSequences(expectedSeqLens, dim);
val = Value::Create<float>(dim, input, device);
// Both the batch and sequence axes are too small.
VerifyException([&val, &sampleShape, &output, &actualSeqLens]() {
val->CopyTo(sampleShape, output, actualSeqLens, false);
}, "The output buffer is too small.");
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(1, input, output, actualSeqLens);
// Random batch and sequence
int testRun = 4;
size_t maxNumOfSequences = 100;
size_t maxSequenceLen = 100;
// This is only used to generate number of sequnces, so boost distribution is not needed.
std::default_random_engine generator;
std::uniform_int_distribution<size_t> distribution(1, maxNumOfSequences);
for (int i = 0; i < testRun; i++)
{
sequences[s].push_back(static_cast<ElementType>(s * 10 + i));
batchCount = distribution(generator);
expectedSeqLens = GenerateSequenceLengths(batchCount, maxSequenceLen);
input = GenerateOneHotSequences(expectedSeqLens, dim);
val = Value::Create<ElementType>(dim, input, device);
val->CopyTo(sampleShape, output, actualSeqLens);
CheckCopyToOutput(1, input, output, actualSeqLens);
}
}
val = Value::Create(sampleShape, sequences, device);
val->CopyTo(sampleShape, output, seqLens);
CheckCopyToOutput(sampleSize, sequences, output, seqLens);
}
void TestSettingParameterValuesManually(const DeviceDescriptor& device)
@ -436,6 +602,7 @@ void SparseSequenceBatchValueCreationTest(size_t vocabSize, size_t maxAllowedSeq
void ValueTests()
{
fprintf(stderr, "\nValueTests..\n");
srand(1);
TestSettingParameterValuesManually(DeviceDescriptor::CPUDevice());
@ -447,12 +614,12 @@ void ValueTests()
ValueCreationOneHotNoNDMaskTest<double>(DeviceDescriptor::CPUDevice(), true);
ValueCreationOneHotWithNDMaskTest<double>(DeviceDescriptor::CPUDevice(), false);
ValueCreationOneHotWithNDMaskTest<float>(DeviceDescriptor::CPUDevice(), true);
<<<<<<< HEAD
SparseSequenceBatchValueCreationTest(300, 7, DeviceDescriptor::CPUDevice());
SparseSequenceBatchValueCreationTest(2300, 1, DeviceDescriptor::CPUDevice());
=======
ValueCopyToDenseTest<float>(DeviceDescriptor::CPUDevice());
>>>>>>> 0a57cbc... refactor Value::CopyTo to allocate memory in .h file; adapt unit tests; add sequencesLens as parameter for Value::CopyTo
ValueCopyToDenseTest<double>(DeviceDescriptor::CPUDevice());
ValueCopyToOneHotTest<float>(DeviceDescriptor::CPUDevice());
ValueCopyToOneHotTest<double>(DeviceDescriptor::CPUDevice());
if (IsGPUAvailable())
{
@ -466,11 +633,11 @@ void ValueTests()
ValueCreationOneHotNoNDMaskTest<float>(DeviceDescriptor::GPUDevice(0), true);
ValueCreationOneHotWithNDMaskTest<float>(DeviceDescriptor::GPUDevice(0), false);
ValueCreationOneHotWithNDMaskTest<double>(DeviceDescriptor::GPUDevice(0), true);
<<<<<<< HEAD
SparseSequenceBatchValueCreationTest(50000, 1, DeviceDescriptor::GPUDevice(0));
SparseSequenceBatchValueCreationTest(6000, 6, DeviceDescriptor::GPUDevice(0));
=======
ValueCopyToDenseTest<float>(DeviceDescriptor::GPUDevice(0));
>>>>>>> 0a57cbc... refactor Value::CopyTo to allocate memory in .h file; adapt unit tests; add sequencesLens as parameter for Value::CopyTo
ValueCopyToDenseTest<double>(DeviceDescriptor::GPUDevice(0));
ValueCopyToOneHotTest<float>(DeviceDescriptor::GPUDevice(0));
ValueCopyToOneHotTest<double>(DeviceDescriptor::GPUDevice(0));
}
}