This commit is contained in:
Eldar Akchurin 2016-12-12 14:16:10 +01:00
Родитель 215632bf51
Коммит eabd0e37d8
10 изменённых файлов: 55 добавлений и 15 удалений

Просмотреть файл

@ -449,7 +449,7 @@ $(CNTKLIBRARY_LIB): $(CNTKLIBRARY_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) $(PROTOBUF_PATH)/lib/libprotobuf.a
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) $(PROTOBUF_PATH)/lib/libprotobuf.a -fopenmp
########################################
# CNTKLibrary tests

Просмотреть файл

@ -3539,8 +3539,9 @@ namespace CNTK
Variable m_testSampleCountVar;
LearnersPtr m_parameterLearners;
bool m_distributed;
ValuePtr m_rootGradientValue;
size_t m_prevMinibatchNumSamples;
size_t m_prevMinibatchNumSamples;
ValuePtr m_prevMinibatchAggregateTrainingLossValue;
ValuePtr m_prevMinibatchAggregateEvalCriterionValue;
};

Просмотреть файл

@ -243,6 +243,7 @@ namespace CNTK
CNTK_API void ForceSynchronousCUDAKernelExecutions();
CNTK_API void ForceDeterministicAlgorithms();
CNTK_API bool ShouldForceDeterministicAlgorithms();
CNTK_API void SetFixedRandomSeed(unsigned long fixedRandomSeed);
@ -259,5 +260,10 @@ namespace CNTK
CNTK_API bool AreEqual(const ::CNTK::Value& value1, const ::CNTK::Value& value2, double relativeTolerance = 0.0, double absoluteTolerance = 0.0);
class VariableResolver;
///
/// Returns true if num CPU Threads was set.
///
bool MaxNumCPUThreadsSet();
}
}

Просмотреть файл

@ -61,6 +61,7 @@
<AdditionalIncludeDirectories>.\API;.\proto;$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\Readers\ReaderLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(SolutionDir)Source\ActionsLib;$(MSMPI_INC);$(NvmlInclude);$(ProtobufInclude)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(CNTK_ENABLE_1BitSGD)'=='true'">$(SolutionDir)Source\1BitSGD;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions Condition="'$(CNTK_ENABLE_1BitSGD)'=='true'">CNTK_PARALLEL_TRAINING_SUPPORT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\Math;$(MSMPI_LIB64);$(SolutionDir)$(Platform)\$(Configuration);$(NvmlLibPath);$(ProtobufLibPath)</AdditionalLibraryDirectories>
@ -97,7 +98,6 @@
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<PreprocessorDefinitions>CNTKV2LIBRARYDLL;WIN32;NDEBUG;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<OpenMPSupport>false</OpenMPSupport>
<AdditionalOptions>/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<TreatWarningAsError>true</TreatWarningAsError>
@ -187,4 +187,4 @@
<Target Name="CheckDependencies">
<Warning Condition="!$(HasProtobuf)" Text="CNTKv2LibraryDll requires Protocol Buffers to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#protobuf for installation instructions." />
</Target>
</Project>
</Project>

Просмотреть файл

@ -351,6 +351,17 @@ namespace CNTK
{
Microsoft::MSR::CNTK::Globals::ForceDeterministicAlgorithms();
}
bool ShouldForceDeterministicAlgorithms()
{
return Microsoft::MSR::CNTK::Globals::ShouldForceDeterministicAlgorithms();
}
static std::atomic<bool> s_theadsAreSet(false);
bool MaxNumCPUThreadsSet()
{
return s_theadsAreSet;
}
}
/*static*/ const NDShape NDShape::Unknown(1, SentinelDimValueForUnknownShape);
@ -490,21 +501,19 @@ namespace CNTK
return s_allStaticAxes;
}
void Axis::RegisterAxisName(const std::wstring& axisName)
{
s_uniqueDynamicAxisNames.RegisterAxisName(axisName);
}
std::atomic<size_t> s_maxNumCPUThreads(std::thread::hardware_concurrency());
void SetMaxNumCPUThreads(size_t numCPUThreads)
{
s_maxNumCPUThreads.store(numCPUThreads);
Internal::s_theadsAreSet = true;
Microsoft::MSR::CNTK::CPUMatrix<float>::SetNumThreads((int)numCPUThreads);
}
size_t GetMaxNumCPUThreads()
{
return s_maxNumCPUThreads.load();
return Microsoft::MSR::CNTK::CPUMatrix<float>::GetMaxNumThreads();
}
}

Просмотреть файл

@ -7,7 +7,6 @@
#include "CNTKLibrary.h"
#include "Utils.h"
#include "Learner.h"
namespace
{
const std::wstring learnersPropertyName = L"Learners";
@ -28,6 +27,10 @@ namespace CNTK
m_prevMinibatchNumSamples(1),
m_distributed(false)
{
// By default we set the number of threads to hardware concurrency.
if (!Internal::MaxNumCPUThreadsSet())
SetMaxNumCPUThreads(std::thread::hardware_concurrency());
std::vector<Variable> combinedFunctionArgs = { m_model, m_lossFunction };
if (!m_lossFunction->Output().DynamicAxes().empty())
{
@ -214,11 +217,19 @@ namespace CNTK
outputsToFetch[outputToFetch.first] = outputs[outputToFetch.first];
}
ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(m_aggregatedLossFunction->Output().GetDataType(), m_prevMinibatchAggregateTrainingLossValue->Shape(), computeDevice), outputs.at(m_aggregatedLossFunction)->Mask());
if(!m_rootGradientValue ||
m_aggregatedLossFunction->Output().GetDataType() != m_rootGradientValue->GetDataType() ||
m_prevMinibatchAggregateTrainingLossValue->Shape() != m_rootGradientValue->Shape() ||
computeDevice != m_rootGradientValue->Device() ||
outputs.at(m_aggregatedLossFunction)->Mask() != m_rootGradientValue->Mask())
{
m_rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(m_aggregatedLossFunction->Output().GetDataType(), m_prevMinibatchAggregateTrainingLossValue->Shape(), computeDevice), outputs.at(m_aggregatedLossFunction)->Mask());
}
if (m_aggregatedLossFunction->Output().GetDataType() == DataType::Float)
rootGradientValue->Data()->SetValue(1.0f);
m_rootGradientValue->Data()->SetValue(1.0f);
else
rootGradientValue->Data()->SetValue(1.0);
m_rootGradientValue->Data()->SetValue(1.0);
auto modelParameters = m_combinedTrainingFunction->Parameters();
for (const auto& parameter : modelParameters)
@ -227,7 +238,7 @@ namespace CNTK
}
// TODO: Why Backward signature does not take Parameter instead of Variable for gradients?
m_combinedTrainingFunction->Backward(backPropSate, { { m_aggregatedLossFunction, rootGradientValue } }, parameterGradients);
m_combinedTrainingFunction->Backward(backPropSate, { { m_aggregatedLossFunction, m_rootGradientValue } }, parameterGradients);
m_prevMinibatchNumSamples = GetSampleCount(m_trainingSampleCountVar, outputs[m_trainingSampleCountVar]);
}

Просмотреть файл

@ -6077,6 +6077,16 @@ int CPUMatrix<ElemType>::SetNumThreads(int numThreads)
return numThreads;
}
template <class ElemType>
int CPUMatrix<ElemType>::GetMaxNumThreads()
{
int numThreads = (int)std::thread::hardware_concurrency();
#ifdef _OPENMP
numThreads = omp_get_max_threads();
#endif
return numThreads;
}
// To ensure Intel MKL calls return the same results on all Intel or Intel compatible CPUs,
// the function set CBWR compatible mode.
template <class ElemType>

Просмотреть файл

@ -390,6 +390,8 @@ public:
public:
// This functions do not depend on <ElemType>, i.e. you can call them on any <ElemType>
static int SetNumThreads(int numThreads);
static int GetMaxNumThreads();
static void SetCompatibleMode();
// static BLAS functions

Просмотреть файл

@ -54,6 +54,7 @@
<SDLCheck>true</SDLCheck>
<TreatWarningAsError>true</TreatWarningAsError>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math;$(SolutionDir)Source\Readers\ReaderLib</AdditionalIncludeDirectories>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@ -101,4 +102,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -57,7 +57,7 @@
%ignore CNTK::Internal::Gather;
%ignore CNTK::Internal::Scatter;
%ignore CNTK::Internal::Slice;
%ignore CNTK::DistributedCommunicator::AggregateAsync;
%ignore CNTK::Internal::MaxNumCPUThreadsSet;
// These aren't exported from the CNTK C++ library
%ignore CNTK::Internal::IsReversingTensorShapesInErrorMessagesEnabled;