Merge branch 'master' into qiwye/asgd-dev

This commit is contained in:
Qiwei Ye 2016-09-28 10:54:51 +08:00
Родитель 8a2e67aba8 732e9d6e45
Коммит 414bf2e02a
7 изменённых файлов: 77 добавлений и 13 удалений

Просмотреть файл

@ -43,16 +43,50 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: Should be unified with StreamDescription from the new reader API
struct InputStreamDescription
{
InputStreamDescription(const std::wstring& name, int deviceId, MatrixType matrixType, MatrixFormat format)
: m_name(name), m_deviceId(deviceId), m_matrixType(matrixType), m_format(format)
{}
const std::wstring& GetStreamName() const
{
return m_name;
}
int GetDeviceId() const
{
return m_deviceId;
}
MatrixType GetMatrixType() const
{
return m_matrixType;
}
MatrixFormat GetMatrixFormat() const
{
return m_format;
}
private:
// Stream name.
std::wstring m_name;
// Device identifier for the resulting matrix of this stream.
int m_deviceId;
// Matrix type.
MatrixType m_matrixType;
// Matrix format.
MatrixFormat m_format;
};
inline bool operator == (const InputStreamDescription& a, const InputStreamDescription& b)
{
return a.m_name == b.m_name && a.m_deviceId == b.m_deviceId;
return a.GetStreamName() == b.GetStreamName() &&
a.GetDeviceId() == b.GetDeviceId() &&
a.GetMatrixType() == b.GetMatrixType() &&
a.GetMatrixFormat() == b.GetMatrixFormat();
};
}}}
@ -63,7 +97,7 @@ namespace std
size_t operator()(const Microsoft::MSR::CNTK::InputStreamDescription& x) const
{
// Input name is unique, simply return the hash of the input stream.
return std::hash<std::wstring>()(x.m_name);
return std::hash<std::wstring>()(x.GetStreamName());
}
};
}
@ -163,7 +197,8 @@ public:
std::unordered_set<InputStreamDescription> streamDescriptions;
for (auto input = begin(); input != end(); ++input)
{
streamDescriptions.insert(InputStreamDescription{ input->first, input->second.matrix->GetDeviceId() });
streamDescriptions.insert(
InputStreamDescription(input->first, input->second.matrix->GetDeviceId(), input->second.matrix->GetMatrixType(), input->second.matrix->GetFormat()));
}
return streamDescriptions;
}

Просмотреть файл

@ -912,7 +912,14 @@ void GPUSparseMatrix<ElemType>::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYP
cudaMemcpyKind kind = IsOnDevice ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice;
if (transferer)
{
// TODO: All RequireSizeAndAllocate should be async and use a transferer.
// Currently there are some memset operations that can be still executing on the default stream,
// Here we have to wait for them to finish.
transferer->RecordComputeStreamSyncPoint();
transferer->WaitForSyncPointOnAssignStreamAsync();
transferer->CopyCPUToGPUAsync(h_Val, nz, sizeof(ElemType), Data());
}
else
CUDA_CALL(cudaMemcpy(Data(), h_Val, nz * sizeof(ElemType), kind));

Просмотреть файл

@ -3502,6 +3502,18 @@ int Matrix<ElemType>::GetDeviceId() const
{ return m_GPUSparseMatrix->GetComputeDeviceId(); });
}
template <class ElemType>
MatrixType Matrix<ElemType>::GetMatrixType() const
{
return m_matrixType;
}
template <class ElemType>
MatrixFormat Matrix<ElemType>::GetFormat() const
{
return m_baseMatrix->GetFormat();
}
// TODO: Comment why we need a second ElemType.
// TODO: Move the shared core functions to the front of this source file.
// BUGBUG: This performs a copy operation even for the output matrix that gets overwritten right away.

Просмотреть файл

@ -49,6 +49,8 @@ template <class ElemType> class DeviceBoundNumber;
struct /*interface*/ MATH_API MatrixBase
{
virtual int GetDeviceId() const = 0;
virtual MatrixType GetMatrixType() const = 0;
virtual MatrixFormat GetFormat() const = 0;
// TODO: Move more generic functions such as getting dims, resizing, and getting/setting as scalars in here.
virtual ~MatrixBase();
};
@ -147,8 +149,8 @@ public:
return node;
}
MatrixType GetMatrixType() const { return m_matrixType; }
MatrixFormat GetFormat() const { return m_baseMatrix->GetFormat(); }
MatrixType GetMatrixType() const override;
MatrixFormat GetFormat() const override;
bool OwnBuffer() const { return m_baseMatrix->OwnBuffer(); }
int GetDeviceId() const; // -1 if CPU, otherwise GPU CUDA device id
DEVICEID_TYPE GetPreferredDeviceId() const { return m_preferredDeviceId; }; // -1 if CPU, otherwise GPU CUDA device id

Просмотреть файл

@ -84,15 +84,15 @@ void ReaderShim<ElemType>::StartDistributedMinibatchLoop(
// Now we can be sure, no prefetch thread is running and there are no outstanding memcopies.
// Let's check that requested devices are ok and see whether we need to change our data transferers.
auto device = std::find_if(inputs.begin(), inputs.end(),
[](const InputStreamDescription& d) { return d.m_deviceId != CPUDEVICE; });
auto deviceId = device != inputs.end() ? device->m_deviceId : CPUDEVICE;
[](const InputStreamDescription& d) { return d.GetDeviceId() != CPUDEVICE; });
auto deviceId = device != inputs.end() ? device->GetDeviceId() : CPUDEVICE;
// Check that all devices either the same as m_deviceId or CPU.
auto secondDevice = std::find_if(inputs.begin(), inputs.end(),
[deviceId](const InputStreamDescription& d) { return d.m_deviceId != CPUDEVICE && d.m_deviceId != deviceId; });
[deviceId](const InputStreamDescription& d) { return d.GetDeviceId() != CPUDEVICE && d.GetDeviceId() != deviceId; });
if (secondDevice != inputs.end())
{
LogicError("Readers do not support running on several GPUs in the same process, at least two devices found '%d', '%d'", deviceId, secondDevice->m_deviceId);
LogicError("Readers do not support running on several GPUs in the same process, at least two devices found '%d', '%d'", deviceId, secondDevice->GetDeviceId());
}
if (m_deviceId != deviceId)
@ -109,8 +109,13 @@ void ReaderShim<ElemType>::StartDistributedMinibatchLoop(
std::map<std::wstring, int> inputDescriptions;
for (const auto& i : inputs)
{
inputDescriptions[i.m_name] = i.m_deviceId;
m_prefetchBuffers[i.m_name] = StreamPrefetchBuffer{ std::make_shared<Matrix<ElemType>>(i.m_deviceId), nullptr };
inputDescriptions[i.GetStreamName()] = i.GetDeviceId();
// Creating buffers with the same properties the network expects.
m_prefetchBuffers[i.GetStreamName()] = StreamPrefetchBuffer
{
std::make_shared<Matrix<ElemType>>(0, 0, i.GetDeviceId(), i.GetMatrixType(), i.GetMatrixFormat()),
nullptr
};
}
m_endOfEpoch = false;

Просмотреть файл

@ -18,12 +18,15 @@ swig.bat
# a) If you are just building to use it locally:
# Build -> generate .pyd
# 1) go two levels up
# 2) run the following:
python .\setup.py build_ext -if -c msvc --plat-name=win-amd64
# 3) add to PATH the path to cntk dlls (e.g. e:\CNTK\x64\Release)
# 4) add to PYTHONPATH the path to the python api source (e.g. e:\CNTK\bindings\python\)
SET PATH=%PATH%;<your CNTK release path e.g., e:\CNTK\x64\Release
# 4) add to PYTHONPATH the path to the python examples (e.g. e:\CNTK\bindings\python\examples)
SET PYTHONPATH=e:\CNTK\bindings\python\examples
# 5) test by running any of the examples or running py.test from the inside bindings\python directory
# b) If you want to package it:

Просмотреть файл

@ -145,7 +145,7 @@ def seqcla():
# do some manual accuracy testing
acc = calc_accuracy(train_file, ctx.output_filename_base)
# and test for the same number...
TOLERANCE_ABSOLUTE = 2E-02
TOLERANCE_ABSOLUTE = 1E-02
assert np.allclose(acc, 0.6022453889334403, atol=TOLERANCE_ABSOLUTE)
"""