Abort execution if gpu CC less than 3. In auto configuration disallow those gpus

This commit is contained in:
Ivan Rodriguez 2016-06-23 10:24:44 +02:00
Родитель e8d22aaacd
Коммит 143b1cff3b
3 изменённых файлов: 64 добавлений и 13 удалений

Просмотреть файл

@ -117,6 +117,11 @@ size_t GetMaxEpochs(const ConfigParameters& configParams)
return maxEpochs;
}
void checkSupportForGpu(DEVICEID_TYPE deviceId)
{
if(!gpuSupported(deviceId)) InvalidArgument("CNTK: GPU device %d has compute capability less than 3.0", deviceId);
}
// special temporary function to guard against a now invalid usage of "truncated" which exists in some IPG production setups
static void DisableLegacyTruncationSettings(const ConfigParameters& TopLevelConfig, const ConfigParameters& commandConfig)
{
@ -484,6 +489,16 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
let valp = BS::Evaluate(expr); // evaluate parse into a dictionary
let& config = valp.AsRef<ScriptableObjects::IConfigRecord>(); // this is the dictionary
auto valpp = config.Find(L"deviceId");
if (valpp)
{
auto valp = *valpp;
if (!valp.Is<ScriptableObjects::String>()) // If it's not string 'auto' or 'cpu', then it's a gpu
{
checkSupportForGpu(valp);
}
}
// legacy parameters that have changed spelling
if (config.Find(L"DoneFile")) // variables follow camel case (start with lower-case letters)
InvalidArgument("Legacy spelling of 'DoneFile' no longer allowed. Use 'doneFile'.");
@ -582,6 +597,13 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
{
ConfigParameters config;
std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config); // get the command param set they want
DEVICEID_TYPE deviceId = config("deviceId");
if (deviceId >= 0)
{
checkSupportForGpu(deviceId);
}
bool timestamping = config(L"timestamping", false);
if (timestamping)
{

Просмотреть файл

@ -107,6 +107,7 @@ public:
void Init();
void SetAllowedDevices(const std::vector<int>& devices); // only allow certain GPUs
bool DeviceAllowed(int device);
void DisallowUnsupportedDevices();
void DisallowDevice(int device)
{
assert((device >= -1) && (device <= 31));
@ -158,6 +159,8 @@ static DEVICEID_TYPE SelectDevice(DEVICEID_TYPE deviceId, bool bLockGPU, const i
{
g_bestGpu->DisallowDevice(excludedDevices[i]);
}
g_bestGpu->DisallowUnsupportedDevices();
}
bestDeviceId = (DEVICEID_TYPE)g_bestGpu->GetDevice(BestGpuFlags(bLockGPU ? (bestGpuAvoidSharing | bestGpuExclusiveLock) : bestGpuAvoidSharing));
@ -529,6 +532,33 @@ std::vector<int> BestGpu::GetDevices(int number, BestGpuFlags p_bestFlags)
return best; // return the array of the best GPUs
}
void BestGpu::DisallowUnsupportedDevices()
{
for (ProcessorData* pd : m_procData)
{
if (pd->deviceProp.major < 3)
{
DisallowDevice(pd->deviceId); //We don't support GPUs with Compute Capability < 3.0
}
}
}
bool gpuSupported(DEVICEID_TYPE deviceId){
auto bestGpu = make_unique<BestGpu>();
std::vector<ProcessorData*> processorData = bestGpu->GetProcessorData();
for (ProcessorData* pd : processorData)
{
if (pd->deviceId == deviceId)
{
return pd->deviceProp.major >= 3; //We support GPUs with Compute Capability >= 3.0
}
}
return false;
}
std::vector<GpuData> GetGpusData()
{
std::vector<GpuData> data;
@ -536,21 +566,19 @@ std::vector<GpuData> GetGpusData()
auto bestGpu = make_unique<BestGpu>();
std::vector<ProcessorData*> processorData = bestGpu->GetProcessorData();
if (!processorData.empty())
for (ProcessorData* pd : processorData)
{
for (ProcessorData* pd : processorData)
{
GpuData gpuData;
gpuData.m_major = pd->deviceProp.major;
gpuData.m_minor = pd->deviceProp.minor;
gpuData.m_cudaCores = pd->cores;
gpuData.m_deviceId = pd->deviceId;
GpuData gpuData;
gpuData.m_major = pd->deviceProp.major;
gpuData.m_minor = pd->deviceProp.minor;
gpuData.m_cudaCores = pd->cores;
gpuData.m_deviceId = pd->deviceId;
string gpuName(pd->deviceProp.name);
gpuData.m_name = gpuName;
gpuData.m_totalMemory = pd->deviceProp.totalGlobalMem/(1024*1024); //From bytes to MBytes
data.push_back(gpuData);
}
string gpuName(pd->deviceProp.name);
gpuData.m_name = gpuName;
gpuData.m_totalMemory = pd->deviceProp.totalGlobalMem/(1024*1024); //From bytes to MBytes
data.push_back(gpuData);
}
return data;

Просмотреть файл

@ -29,6 +29,7 @@ struct GpuData
};
std::vector<GpuData> GetGpusData();
bool gpuSupported(DEVICEID_TYPE deviceId);
class ConfigParameters;
DEVICEID_TYPE DeviceFromConfig(const ConfigParameters& config);