Fixing omp
This commit is contained in:
Родитель
215632bf51
Коммит
eabd0e37d8
2
Makefile
2
Makefile
|
@ -449,7 +449,7 @@ $(CNTKLIBRARY_LIB): $(CNTKLIBRARY_OBJ) | $(CNTKMATH_LIB)
|
|||
@echo $(SEPARATOR)
|
||||
@mkdir -p $(dir $@)
|
||||
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
|
||||
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) $(PROTOBUF_PATH)/lib/libprotobuf.a
|
||||
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) $(PROTOBUF_PATH)/lib/libprotobuf.a -fopenmp
|
||||
|
||||
########################################
|
||||
# CNTKLibrary tests
|
||||
|
|
|
@ -3539,8 +3539,9 @@ namespace CNTK
|
|||
Variable m_testSampleCountVar;
|
||||
LearnersPtr m_parameterLearners;
|
||||
bool m_distributed;
|
||||
ValuePtr m_rootGradientValue;
|
||||
|
||||
size_t m_prevMinibatchNumSamples;
|
||||
size_t m_prevMinibatchNumSamples;
|
||||
ValuePtr m_prevMinibatchAggregateTrainingLossValue;
|
||||
ValuePtr m_prevMinibatchAggregateEvalCriterionValue;
|
||||
};
|
||||
|
|
|
@ -243,6 +243,7 @@ namespace CNTK
|
|||
CNTK_API void ForceSynchronousCUDAKernelExecutions();
|
||||
|
||||
CNTK_API void ForceDeterministicAlgorithms();
|
||||
CNTK_API bool ShouldForceDeterministicAlgorithms();
|
||||
|
||||
CNTK_API void SetFixedRandomSeed(unsigned long fixedRandomSeed);
|
||||
|
||||
|
@ -259,5 +260,10 @@ namespace CNTK
|
|||
CNTK_API bool AreEqual(const ::CNTK::Value& value1, const ::CNTK::Value& value2, double relativeTolerance = 0.0, double absoluteTolerance = 0.0);
|
||||
|
||||
class VariableResolver;
|
||||
|
||||
///
|
||||
/// Returns true if num CPU Threads was set.
|
||||
///
|
||||
bool MaxNumCPUThreadsSet();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -61,6 +61,7 @@
|
|||
<AdditionalIncludeDirectories>.\API;.\proto;$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\Readers\ReaderLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(SolutionDir)Source\ActionsLib;$(MSMPI_INC);$(NvmlInclude);$(ProtobufInclude)</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories Condition="'$(CNTK_ENABLE_1BitSGD)'=='true'">$(SolutionDir)Source\1BitSGD;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions Condition="'$(CNTK_ENABLE_1BitSGD)'=='true'">CNTK_PARALLEL_TRAINING_SUPPORT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<OpenMPSupport>true</OpenMPSupport>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\Math;$(MSMPI_LIB64);$(SolutionDir)$(Platform)\$(Configuration);$(NvmlLibPath);$(ProtobufLibPath)</AdditionalLibraryDirectories>
|
||||
|
@ -97,7 +98,6 @@
|
|||
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
||||
<PreprocessorDefinitions>CNTKV2LIBRARYDLL;WIN32;NDEBUG;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<OpenMPSupport>false</OpenMPSupport>
|
||||
<AdditionalOptions>/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
|
@ -187,4 +187,4 @@
|
|||
<Target Name="CheckDependencies">
|
||||
<Warning Condition="!$(HasProtobuf)" Text="CNTKv2LibraryDll requires Protocol Buffers to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#protobuf for installation instructions." />
|
||||
</Target>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -351,6 +351,17 @@ namespace CNTK
|
|||
{
|
||||
Microsoft::MSR::CNTK::Globals::ForceDeterministicAlgorithms();
|
||||
}
|
||||
|
||||
bool ShouldForceDeterministicAlgorithms()
|
||||
{
|
||||
return Microsoft::MSR::CNTK::Globals::ShouldForceDeterministicAlgorithms();
|
||||
}
|
||||
|
||||
static std::atomic<bool> s_theadsAreSet(false);
|
||||
bool MaxNumCPUThreadsSet()
|
||||
{
|
||||
return s_theadsAreSet;
|
||||
}
|
||||
}
|
||||
|
||||
/*static*/ const NDShape NDShape::Unknown(1, SentinelDimValueForUnknownShape);
|
||||
|
@ -490,21 +501,19 @@ namespace CNTK
|
|||
return s_allStaticAxes;
|
||||
}
|
||||
|
||||
|
||||
void Axis::RegisterAxisName(const std::wstring& axisName)
|
||||
{
|
||||
s_uniqueDynamicAxisNames.RegisterAxisName(axisName);
|
||||
}
|
||||
|
||||
std::atomic<size_t> s_maxNumCPUThreads(std::thread::hardware_concurrency());
|
||||
void SetMaxNumCPUThreads(size_t numCPUThreads)
|
||||
{
|
||||
s_maxNumCPUThreads.store(numCPUThreads);
|
||||
Internal::s_theadsAreSet = true;
|
||||
Microsoft::MSR::CNTK::CPUMatrix<float>::SetNumThreads((int)numCPUThreads);
|
||||
}
|
||||
|
||||
size_t GetMaxNumCPUThreads()
|
||||
{
|
||||
return s_maxNumCPUThreads.load();
|
||||
return Microsoft::MSR::CNTK::CPUMatrix<float>::GetMaxNumThreads();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
#include "CNTKLibrary.h"
|
||||
#include "Utils.h"
|
||||
#include "Learner.h"
|
||||
|
||||
namespace
|
||||
{
|
||||
const std::wstring learnersPropertyName = L"Learners";
|
||||
|
@ -28,6 +27,10 @@ namespace CNTK
|
|||
m_prevMinibatchNumSamples(1),
|
||||
m_distributed(false)
|
||||
{
|
||||
// By default we set the number of threads to hardware concurrency.
|
||||
if (!Internal::MaxNumCPUThreadsSet())
|
||||
SetMaxNumCPUThreads(std::thread::hardware_concurrency());
|
||||
|
||||
std::vector<Variable> combinedFunctionArgs = { m_model, m_lossFunction };
|
||||
if (!m_lossFunction->Output().DynamicAxes().empty())
|
||||
{
|
||||
|
@ -214,11 +217,19 @@ namespace CNTK
|
|||
outputsToFetch[outputToFetch.first] = outputs[outputToFetch.first];
|
||||
}
|
||||
|
||||
ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(m_aggregatedLossFunction->Output().GetDataType(), m_prevMinibatchAggregateTrainingLossValue->Shape(), computeDevice), outputs.at(m_aggregatedLossFunction)->Mask());
|
||||
if(!m_rootGradientValue ||
|
||||
m_aggregatedLossFunction->Output().GetDataType() != m_rootGradientValue->GetDataType() ||
|
||||
m_prevMinibatchAggregateTrainingLossValue->Shape() != m_rootGradientValue->Shape() ||
|
||||
computeDevice != m_rootGradientValue->Device() ||
|
||||
outputs.at(m_aggregatedLossFunction)->Mask() != m_rootGradientValue->Mask())
|
||||
{
|
||||
m_rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(m_aggregatedLossFunction->Output().GetDataType(), m_prevMinibatchAggregateTrainingLossValue->Shape(), computeDevice), outputs.at(m_aggregatedLossFunction)->Mask());
|
||||
}
|
||||
|
||||
if (m_aggregatedLossFunction->Output().GetDataType() == DataType::Float)
|
||||
rootGradientValue->Data()->SetValue(1.0f);
|
||||
m_rootGradientValue->Data()->SetValue(1.0f);
|
||||
else
|
||||
rootGradientValue->Data()->SetValue(1.0);
|
||||
m_rootGradientValue->Data()->SetValue(1.0);
|
||||
|
||||
auto modelParameters = m_combinedTrainingFunction->Parameters();
|
||||
for (const auto& parameter : modelParameters)
|
||||
|
@ -227,7 +238,7 @@ namespace CNTK
|
|||
}
|
||||
|
||||
// TODO: Why Backward signature does not take Parameter instead of Variable for gradients?
|
||||
m_combinedTrainingFunction->Backward(backPropSate, { { m_aggregatedLossFunction, rootGradientValue } }, parameterGradients);
|
||||
m_combinedTrainingFunction->Backward(backPropSate, { { m_aggregatedLossFunction, m_rootGradientValue } }, parameterGradients);
|
||||
m_prevMinibatchNumSamples = GetSampleCount(m_trainingSampleCountVar, outputs[m_trainingSampleCountVar]);
|
||||
}
|
||||
|
||||
|
|
|
@ -6077,6 +6077,16 @@ int CPUMatrix<ElemType>::SetNumThreads(int numThreads)
|
|||
return numThreads;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
int CPUMatrix<ElemType>::GetMaxNumThreads()
|
||||
{
|
||||
int numThreads = (int)std::thread::hardware_concurrency();
|
||||
#ifdef _OPENMP
|
||||
numThreads = omp_get_max_threads();
|
||||
#endif
|
||||
return numThreads;
|
||||
}
|
||||
|
||||
// To ensure Intel MKL calls return the same results on all Intel or Intel compatible CPUs,
|
||||
// the function set CBWR compatible mode.
|
||||
template <class ElemType>
|
||||
|
|
|
@ -390,6 +390,8 @@ public:
|
|||
public:
|
||||
// This functions do not depend on <ElemType>, i.e. you can call them on any <ElemType>
|
||||
static int SetNumThreads(int numThreads);
|
||||
static int GetMaxNumThreads();
|
||||
|
||||
static void SetCompatibleMode();
|
||||
|
||||
// static BLAS functions
|
||||
|
|
|
@ -54,6 +54,7 @@
|
|||
<SDLCheck>true</SDLCheck>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math;$(SolutionDir)Source\Readers\ReaderLib</AdditionalIncludeDirectories>
|
||||
<OpenMPSupport>true</OpenMPSupport>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
@ -101,4 +102,4 @@
|
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
%ignore CNTK::Internal::Gather;
|
||||
%ignore CNTK::Internal::Scatter;
|
||||
%ignore CNTK::Internal::Slice;
|
||||
%ignore CNTK::DistributedCommunicator::AggregateAsync;
|
||||
%ignore CNTK::Internal::MaxNumCPUThreadsSet;
|
||||
|
||||
// These aren't exported from the CNTK C++ library
|
||||
%ignore CNTK::Internal::IsReversingTensorShapesInErrorMessagesEnabled;
|
||||
|
|
Загрузка…
Ссылка в новой задаче