Abort execution if gpu CC less than 3. In auto configuration disallow those gpus
This commit is contained in:
Родитель
e8d22aaacd
Коммит
143b1cff3b
|
@ -117,6 +117,11 @@ size_t GetMaxEpochs(const ConfigParameters& configParams)
|
|||
return maxEpochs;
|
||||
}
|
||||
|
||||
void checkSupportForGpu(DEVICEID_TYPE deviceId)
|
||||
{
|
||||
if(!gpuSupported(deviceId)) InvalidArgument("CNTK: GPU device %d has compute capability less than 3.0", deviceId);
|
||||
}
|
||||
|
||||
// special temporary function to guard against a now invalid usage of "truncated" which exists in some IPG production setups
|
||||
static void DisableLegacyTruncationSettings(const ConfigParameters& TopLevelConfig, const ConfigParameters& commandConfig)
|
||||
{
|
||||
|
@ -484,6 +489,16 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
|
|||
let valp = BS::Evaluate(expr); // evaluate parse into a dictionary
|
||||
let& config = valp.AsRef<ScriptableObjects::IConfigRecord>(); // this is the dictionary
|
||||
|
||||
auto valpp = config.Find(L"deviceId");
|
||||
if (valpp)
|
||||
{
|
||||
auto valp = *valpp;
|
||||
if (!valp.Is<ScriptableObjects::String>()) // If it's not string 'auto' or 'cpu', then it's a gpu
|
||||
{
|
||||
checkSupportForGpu(valp);
|
||||
}
|
||||
}
|
||||
|
||||
// legacy parameters that have changed spelling
|
||||
if (config.Find(L"DoneFile")) // variables follow camel case (start with lower-case letters)
|
||||
InvalidArgument("Legacy spelling of 'DoneFile' no longer allowed. Use 'doneFile'.");
|
||||
|
@ -582,6 +597,13 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
|
|||
{
|
||||
ConfigParameters config;
|
||||
std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config); // get the command param set they want
|
||||
|
||||
DEVICEID_TYPE deviceId = config("deviceId");
|
||||
if (deviceId >= 0)
|
||||
{
|
||||
checkSupportForGpu(deviceId);
|
||||
}
|
||||
|
||||
bool timestamping = config(L"timestamping", false);
|
||||
if (timestamping)
|
||||
{
|
||||
|
|
|
@ -107,6 +107,7 @@ public:
|
|||
void Init();
|
||||
void SetAllowedDevices(const std::vector<int>& devices); // only allow certain GPUs
|
||||
bool DeviceAllowed(int device);
|
||||
void DisallowUnsupportedDevices();
|
||||
void DisallowDevice(int device)
|
||||
{
|
||||
assert((device >= -1) && (device <= 31));
|
||||
|
@ -158,6 +159,8 @@ static DEVICEID_TYPE SelectDevice(DEVICEID_TYPE deviceId, bool bLockGPU, const i
|
|||
{
|
||||
g_bestGpu->DisallowDevice(excludedDevices[i]);
|
||||
}
|
||||
|
||||
g_bestGpu->DisallowUnsupportedDevices();
|
||||
}
|
||||
|
||||
bestDeviceId = (DEVICEID_TYPE)g_bestGpu->GetDevice(BestGpuFlags(bLockGPU ? (bestGpuAvoidSharing | bestGpuExclusiveLock) : bestGpuAvoidSharing));
|
||||
|
@ -529,6 +532,33 @@ std::vector<int> BestGpu::GetDevices(int number, BestGpuFlags p_bestFlags)
|
|||
return best; // return the array of the best GPUs
|
||||
}
|
||||
|
||||
void BestGpu::DisallowUnsupportedDevices()
|
||||
{
|
||||
for (ProcessorData* pd : m_procData)
|
||||
{
|
||||
if (pd->deviceProp.major < 3)
|
||||
{
|
||||
DisallowDevice(pd->deviceId); //We don't support GPUs with Compute Capability < 3.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool gpuSupported(DEVICEID_TYPE deviceId){
|
||||
auto bestGpu = make_unique<BestGpu>();
|
||||
|
||||
std::vector<ProcessorData*> processorData = bestGpu->GetProcessorData();
|
||||
|
||||
for (ProcessorData* pd : processorData)
|
||||
{
|
||||
if (pd->deviceId == deviceId)
|
||||
{
|
||||
return pd->deviceProp.major >= 3; //We support GPUs with Compute Capability >= 3.0
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<GpuData> GetGpusData()
|
||||
{
|
||||
std::vector<GpuData> data;
|
||||
|
@ -536,21 +566,19 @@ std::vector<GpuData> GetGpusData()
|
|||
auto bestGpu = make_unique<BestGpu>();
|
||||
|
||||
std::vector<ProcessorData*> processorData = bestGpu->GetProcessorData();
|
||||
if (!processorData.empty())
|
||||
|
||||
for (ProcessorData* pd : processorData)
|
||||
{
|
||||
for (ProcessorData* pd : processorData)
|
||||
{
|
||||
GpuData gpuData;
|
||||
gpuData.m_major = pd->deviceProp.major;
|
||||
gpuData.m_minor = pd->deviceProp.minor;
|
||||
gpuData.m_cudaCores = pd->cores;
|
||||
gpuData.m_deviceId = pd->deviceId;
|
||||
GpuData gpuData;
|
||||
gpuData.m_major = pd->deviceProp.major;
|
||||
gpuData.m_minor = pd->deviceProp.minor;
|
||||
gpuData.m_cudaCores = pd->cores;
|
||||
gpuData.m_deviceId = pd->deviceId;
|
||||
|
||||
string gpuName(pd->deviceProp.name);
|
||||
gpuData.m_name = gpuName;
|
||||
gpuData.m_totalMemory = pd->deviceProp.totalGlobalMem/(1024*1024); //From bytes to MBytes
|
||||
data.push_back(gpuData);
|
||||
}
|
||||
string gpuName(pd->deviceProp.name);
|
||||
gpuData.m_name = gpuName;
|
||||
gpuData.m_totalMemory = pd->deviceProp.totalGlobalMem/(1024*1024); //From bytes to MBytes
|
||||
data.push_back(gpuData);
|
||||
}
|
||||
|
||||
return data;
|
||||
|
|
|
@ -29,6 +29,7 @@ struct GpuData
|
|||
};
|
||||
|
||||
std::vector<GpuData> GetGpusData();
|
||||
bool gpuSupported(DEVICEID_TYPE deviceId);
|
||||
|
||||
class ConfigParameters;
|
||||
DEVICEID_TYPE DeviceFromConfig(const ConfigParameters& config);
|
||||
|
|
Загрузка…
Ссылка в новой задаче