631 строка
20 KiB
C++
631 строка
20 KiB
C++
//
|
||
// <copyright file="BestGPU.cpp" company="Microsoft">
|
||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||
// </copyright>
|
||
//
|
||
|
||
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
|
||
|
||
#include "BestGpu.h"
|
||
#include "commandArgUtil.h" // for ConfigParameters
|
||
#ifndef CPUONLY
|
||
#pragma comment (lib, "cudart.lib")
|
||
#pragma comment (lib, "nvml.lib")
|
||
#include <cuda_runtime.h>
|
||
#include <nvml.h>
|
||
#include <vector>
|
||
#endif
|
||
#include "CommonMatrix.h" // for CPUDEVICE and AUTOPLACEMATRIX
|
||
|
||
#ifndef CPUONLY // #define this to disable GPUs
|
||
|
||
// CUDA-C includes
|
||
#include <cuda.h>
|
||
#include <windows.h>
|
||
#include <delayimp.h>
|
||
#include <Shlobj.h>
|
||
#include <stdio.h>
|
||
|
||
#ifdef MPI_SUPPORT
|
||
#include "mpi.h"
|
||
#endif
|
||
extern int myRank;
|
||
extern int numProcs;
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// BestGpu class
|
||
// ---------------------------------------------------------------------------
|
||
|
||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||
|
||
struct ProcessorData
|
||
{
|
||
int cores;
|
||
nvmlMemory_t memory;
|
||
nvmlUtilization_t utilization;
|
||
cudaDeviceProp deviceProp;
|
||
size_t cudaFreeMem;
|
||
size_t cudaTotalMem;
|
||
bool dbnFound;
|
||
bool cnFound;
|
||
int deviceId; // the deviceId (cuda side) for this processor
|
||
};
|
||
|
||
enum BestGpuFlags
|
||
{
|
||
bestGpuNormal = 0,
|
||
bestGpuAvoidSharing = 1, // don't share with other known machine learning Apps (cl.exe/dbn.exe)
|
||
bestGpuFavorMemory = 2, // favor memory
|
||
bestGpuFavorUtilization = 4, // favor low utilization
|
||
bestGpuFavorSpeed = 8, // favor fastest processor
|
||
bestGpuRequery = 256, // rerun the last query, updating statistics
|
||
};
|
||
|
||
class BestGpu
|
||
{
|
||
private:
|
||
bool m_initialized; // initialized
|
||
bool m_nvmlData; // nvml Data is valid
|
||
bool m_cudaData; // cuda Data is valid
|
||
int m_deviceCount; // how many devices are available?
|
||
int m_queryCount; // how many times have we queried the usage counters?
|
||
BestGpuFlags m_lastFlags; // flag state at last query
|
||
int m_lastCount; // count of devices (with filtering of allowed Devices)
|
||
std::vector<ProcessorData*> m_procData;
|
||
int m_allowedDevices; // bitfield of allowed devices
|
||
void GetCudaProperties();
|
||
void GetNvmlData();
|
||
void QueryNvmlData();
|
||
|
||
public:
|
||
BestGpu() : m_initialized(false), m_nvmlData(false), m_cudaData(false), m_deviceCount(0), m_queryCount(0),
|
||
m_lastFlags(bestGpuNormal), m_lastCount(0), m_allowedDevices(-1)
|
||
{
|
||
Init();
|
||
}
|
||
~BestGpu();
|
||
void Init();
|
||
void SetAllowedDevices(const std::vector<int>& devices); // only allow certain GPUs
|
||
bool DeviceAllowed(int device);
|
||
void DisallowDevice(int device) { m_allowedDevices &= ~(1 << device); }
|
||
void AllowAll(); // reset to allow all GPUs (no allowed list)
|
||
bool UseMultiple(); // using multiple GPUs?
|
||
int GetDevice(BestGpuFlags flags = bestGpuNormal); // get a single device
|
||
static const int AllDevices = -1; // can be used to specify all GPUs in GetDevices() call
|
||
static const int RequeryDevices = -2; // Requery refreshing statistics and picking the same number as last query
|
||
std::vector<int> GetDevices(int number = AllDevices, BestGpuFlags flags = bestGpuNormal); // get multiple devices
|
||
};
|
||
|
||
// DeviceFromConfig - Parse 'deviceId' config parameter to determine what type of behavior is desired
|
||
//Symbol - Meaning
|
||
// Auto - automatically pick a single GPU based on <20>BestGpu<70> score
|
||
// CPU - use the CPU
|
||
// 0 - or some other single number, use a single GPU with CUDA ID same as the number
|
||
// 0:2:3- an array of ids to use, (PTask will only use the specified IDs)
|
||
// *3 - a count of GPUs to use (PTask)
|
||
// All - Use all the GPUs (PTask)
|
||
DEVICEID_TYPE DeviceFromConfig(const ConfigParameters& config)
|
||
{
|
||
static BestGpu* g_bestGpu = NULL;
|
||
DEVICEID_TYPE deviceId = CPUDEVICE;
|
||
|
||
ConfigValue val = config("deviceId", "auto");
|
||
if (!_stricmp(val.c_str(), "CPU"))
|
||
{
|
||
return CPUDEVICE;
|
||
}
|
||
|
||
// potential GPU device, so init our class
|
||
if (g_bestGpu == NULL)
|
||
{
|
||
g_bestGpu = new BestGpu();
|
||
}
|
||
if (!_stricmp(val.c_str(), "Auto"))
|
||
{
|
||
#ifdef MPI_SUPPORT
|
||
// make sure deviceId is unique among processes on the same machine
|
||
g_bestGpu->AllowAll();
|
||
std::string MyName(getenv("COMPUTERNAME"));
|
||
for (int i = 0; i < numProcs; i++)
|
||
{
|
||
DEVICEID_TYPE yourDeviceId = deviceId;
|
||
if (myRank == i)
|
||
{
|
||
std::vector<int> devices = g_bestGpu->GetDevices(1);
|
||
deviceId = yourDeviceId = (DEVICEID_TYPE)devices[0];
|
||
}
|
||
MPI_Bcast(&yourDeviceId, 1, MPI_INT, i, MPI_COMM_WORLD);
|
||
{
|
||
INT32 YourSize = (INT32)MyName.length();
|
||
MPI_Bcast(&YourSize,1,MPI_INT,i,MPI_COMM_WORLD);
|
||
vector<char> YourName(YourSize+1);
|
||
if (myRank == i)
|
||
copy(MyName.begin(), MyName.end(), YourName.begin());
|
||
MPI_Bcast(YourName.data(), YourSize + 1, MPI_CHAR, i, MPI_COMM_WORLD);
|
||
if (myRank != i)
|
||
{
|
||
if (!_strcmpi(MyName.data(), YourName.data()))
|
||
{
|
||
g_bestGpu->DisallowDevice(yourDeviceId);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
#else
|
||
std::vector<int> devices = g_bestGpu->GetDevices(1);
|
||
deviceId = (DEVICEID_TYPE)devices[0];
|
||
#endif
|
||
}
|
||
else if (!_stricmp(val.c_str(), "All"))
|
||
{
|
||
std::vector<int> devices = g_bestGpu->GetDevices(BestGpu::AllDevices);
|
||
deviceId = (DEVICEID_TYPE)devices[0];
|
||
}
|
||
else if (val.size() == 2 && val[0] == '*' && isdigit(val[1]))
|
||
{
|
||
int number = (int)(val[1] - '0');
|
||
std::vector<int> devices = g_bestGpu->GetDevices(number);
|
||
deviceId = (DEVICEID_TYPE)devices[0];
|
||
}
|
||
else
|
||
{
|
||
ConfigArray arr = val;
|
||
if (arr.size() == 1)
|
||
{
|
||
deviceId = arr[0];
|
||
}
|
||
else
|
||
{
|
||
argvector<int> allowed = arr;
|
||
g_bestGpu->SetAllowedDevices(allowed);
|
||
std::vector<int> devices = g_bestGpu->GetDevices();
|
||
deviceId = (DEVICEID_TYPE)devices[0];
|
||
}
|
||
}
|
||
return deviceId;
|
||
}
|
||
|
||
// !!!!This is from helper_cuda.h which comes with CUDA samples!!!! Consider if it is beneficial to just include all helper_cuda.h
|
||
// TODO: This is duplicated in GPUMatrix.cu
|
||
// Beginning of GPU Architecture definitions
|
||
inline int _ConvertSMVer2Cores(int major, int minor)
|
||
{
|
||
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
||
typedef struct
|
||
{
|
||
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
||
int Cores;
|
||
} sSMtoCores;
|
||
|
||
sSMtoCores nGpuArchCoresPerSM[] =
|
||
{
|
||
{ 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
|
||
{ 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
|
||
{ 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
|
||
{ 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
|
||
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
|
||
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
|
||
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
|
||
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
|
||
{ -1, -1 }
|
||
};
|
||
|
||
int index = 0;
|
||
|
||
while (nGpuArchCoresPerSM[index].SM != -1)
|
||
{
|
||
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
|
||
{
|
||
return nGpuArchCoresPerSM[index].Cores;
|
||
}
|
||
|
||
index++;
|
||
}
|
||
|
||
return nGpuArchCoresPerSM[7].Cores;
|
||
}
|
||
|
||
void BestGpu::GetCudaProperties()
|
||
{
|
||
if (m_cudaData)
|
||
return;
|
||
|
||
int dev = 0;
|
||
|
||
for (ProcessorData* pd : m_procData)
|
||
{
|
||
cudaSetDevice(dev);
|
||
pd->deviceId = dev;
|
||
cudaGetDeviceProperties(&pd->deviceProp, dev);
|
||
size_t free;
|
||
size_t total;
|
||
cudaMemGetInfo(&free, &total);
|
||
pd->cores = _ConvertSMVer2Cores(pd->deviceProp.major, pd->deviceProp.minor) * pd->deviceProp.multiProcessorCount;
|
||
pd->cudaFreeMem = free;
|
||
pd->cudaTotalMem = total;
|
||
dev++;
|
||
cudaDeviceReset();
|
||
}
|
||
m_cudaData = m_procData.size() > 0;
|
||
}
|
||
|
||
void BestGpu::Init()
|
||
{
|
||
if (m_initialized)
|
||
return;
|
||
|
||
//get the count of objects
|
||
//cudaError_t err =
|
||
cudaGetDeviceCount(&m_deviceCount);
|
||
|
||
ProcessorData pdEmpty = { 0 };
|
||
for (int i = 0; i < m_deviceCount; i++)
|
||
{
|
||
ProcessorData* data = new ProcessorData();
|
||
*data = pdEmpty;
|
||
m_procData.push_back(data);
|
||
}
|
||
|
||
if (m_deviceCount > 0)
|
||
{
|
||
GetCudaProperties();
|
||
GetNvmlData();
|
||
}
|
||
m_initialized = true;
|
||
}
|
||
|
||
|
||
BestGpu::~BestGpu()
|
||
{
|
||
for (ProcessorData* data : m_procData)
|
||
{
|
||
delete data;
|
||
}
|
||
m_procData.clear();
|
||
|
||
if (m_nvmlData)
|
||
{
|
||
nvmlShutdown();
|
||
}
|
||
}
|
||
|
||
// GetNvmlData - Get data from the Nvidia Management Library
|
||
void BestGpu::GetNvmlData()
|
||
{
|
||
// if we already did this, or we couldn't initialize the CUDA data, skip it
|
||
if (m_nvmlData || !m_cudaData)
|
||
return;
|
||
|
||
// First initialize NVML library
|
||
nvmlReturn_t result = nvmlInit();
|
||
if (NVML_SUCCESS != result)
|
||
{
|
||
return;
|
||
}
|
||
|
||
QueryNvmlData();
|
||
}
|
||
|
||
// GetDevice - Determine the best device ID to use
|
||
// bestFlags - flags that modify how the score is calculated
|
||
int BestGpu::GetDevice(BestGpuFlags bestFlags)
|
||
{
|
||
std::vector<int> best = GetDevices(1, bestFlags);
|
||
return best[0];
|
||
}
|
||
|
||
// SetAllowedDevices - set the allowed devices array up
|
||
// devices - vector of allowed devices
|
||
void BestGpu::SetAllowedDevices(const std::vector<int>& devices)
|
||
{
|
||
m_allowedDevices = 0;
|
||
for (int device : devices)
|
||
{
|
||
m_allowedDevices |= (1 << device);
|
||
}
|
||
}
|
||
|
||
// DeviceAllowed - is a particular device allowed?
|
||
// returns: true if the device is allowed, otherwise false
|
||
bool BestGpu::DeviceAllowed(int device)
|
||
{
|
||
return !!(m_allowedDevices & (1<<device));
|
||
}
|
||
|
||
// AllowAll - Reset the allowed filter to allow all GPUs
|
||
void BestGpu::AllowAll()
|
||
{
|
||
m_allowedDevices = -1; // set all bits
|
||
}
|
||
|
||
// UseMultiple - Are we using multiple GPUs?
|
||
// returns: true if more than one GPU was returned in last call
|
||
bool BestGpu::UseMultiple()
|
||
{
|
||
return m_lastCount > 1;
|
||
}
|
||
|
||
// GetDevices - Determine the best device IDs to use
|
||
// number - how many devices do we want?
|
||
// bestFlags - flags that modify how the score is calculated
|
||
std::vector<int> BestGpu::GetDevices(int number, BestGpuFlags p_bestFlags)
|
||
{
|
||
BestGpuFlags bestFlags = p_bestFlags;
|
||
|
||
// if they want all devices give them eveything we have
|
||
if (number == AllDevices)
|
||
number = max(m_deviceCount, 1);
|
||
else if (number == RequeryDevices)
|
||
{
|
||
number = m_lastCount;
|
||
}
|
||
|
||
// create the initial array, initialized to all CPU
|
||
std::vector<int> best(number, -1);
|
||
std::vector<double> scores(number, -1.0);
|
||
|
||
// if no GPUs were found, we should use the CPU
|
||
if (m_procData.size() == 0)
|
||
{
|
||
best.resize(1);
|
||
return best;
|
||
}
|
||
|
||
// get latest data
|
||
QueryNvmlData();
|
||
|
||
double utilGpuW = 0.15;
|
||
double utilMemW = 0.1;
|
||
double speedW = 0.2;
|
||
double freeMemW = 0.2;
|
||
double mlAppRunningW = 0.2;
|
||
|
||
// if it's a requery, just use the same flags as last time
|
||
if (bestFlags & bestGpuRequery)
|
||
bestFlags = m_lastFlags;
|
||
|
||
// adjust weights if necessary
|
||
if (bestFlags&bestGpuAvoidSharing)
|
||
{
|
||
mlAppRunningW *= 3;
|
||
}
|
||
if (bestFlags&bestGpuFavorMemory) // favor memory
|
||
{
|
||
freeMemW *= 2;
|
||
}
|
||
if (bestFlags&bestGpuFavorUtilization) // favor low utilization
|
||
{
|
||
utilGpuW *= 2;
|
||
utilMemW *= 2;
|
||
}
|
||
if (bestFlags&bestGpuFavorSpeed) // favor fastest processor
|
||
{
|
||
speedW *= 2;
|
||
}
|
||
|
||
for (ProcessorData* pd : m_procData)
|
||
{
|
||
double score = 0.0;
|
||
|
||
if (!DeviceAllowed(pd->deviceId))
|
||
continue;
|
||
|
||
// GPU utilization score
|
||
score = (1.0 - pd->utilization.gpu / 75.0f) * utilGpuW;
|
||
score += (1.0 - pd->utilization.memory / 60.0f) * utilMemW;
|
||
score += pd->cores / 1000.0f * speedW;
|
||
double mem = pd->memory.free / (double)pd->memory.total;
|
||
// if it's not a tcc driver, then it's WDDM driver and values will be off because windows allocates all the memory from the nvml point of view
|
||
if (!pd->deviceProp.tccDriver)
|
||
mem = pd->cudaFreeMem / (double)pd->cudaTotalMem;
|
||
score += mem * freeMemW;
|
||
score += ((pd->cnFound || pd->dbnFound) ? 0 : 1)*mlAppRunningW;
|
||
for (int i = 0; i < best.size(); i++)
|
||
{
|
||
// look for a better score
|
||
if (score > scores[i])
|
||
{
|
||
// make room for this score in the correct location (insertion sort)
|
||
for (int j = (int)best.size() - 1; j > i; --j)
|
||
{
|
||
scores[j] = scores[j - 1];
|
||
best[j] = best[j - 1];
|
||
}
|
||
scores[i] = score;
|
||
best[i] = pd->deviceId;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
// now get rid of any extra empty slots and disallowed devices
|
||
for (int j = (int)best.size() - 1; j > 0; --j)
|
||
{
|
||
// if this device is not allowed, or never was set remove it
|
||
if (best[j] == -1)
|
||
best.pop_back();
|
||
else
|
||
break;
|
||
}
|
||
|
||
// save off the last values for future requeries
|
||
m_lastFlags = bestFlags;
|
||
m_lastCount = (int)best.size();
|
||
|
||
// if we eliminated all GPUs, use CPU
|
||
if (best.size() == 0)
|
||
{
|
||
best.push_back(-1);
|
||
}
|
||
|
||
return best; // return the array of the best GPUs
|
||
}
|
||
|
||
// QueryNvmlData - Query data from the Nvidia Management Library, and accumulate counters
|
||
void BestGpu::QueryNvmlData()
|
||
{
|
||
if (!m_cudaData)
|
||
return;
|
||
|
||
for (int i = 0; i < m_deviceCount; i++)
|
||
{
|
||
nvmlDevice_t device;
|
||
nvmlPciInfo_t pci;
|
||
nvmlMemory_t memory;
|
||
nvmlUtilization_t utilization;
|
||
|
||
// Query for device handle to perform operations on a device
|
||
nvmlReturn_t result = nvmlDeviceGetHandleByIndex(i, &device);
|
||
if (NVML_SUCCESS != result)
|
||
{
|
||
return;
|
||
}
|
||
|
||
// pci.busId is very useful to know which device physically you're talking to
|
||
// Using PCI identifier you can also match nvmlDevice handle to CUDA device.
|
||
result = nvmlDeviceGetPciInfo(device, &pci);
|
||
if (NVML_SUCCESS != result)
|
||
{
|
||
return;
|
||
}
|
||
|
||
ProcessorData* curPd = NULL;
|
||
for (ProcessorData* pd : m_procData)
|
||
{
|
||
if (pd->deviceProp.pciBusID == (int)pci.bus)
|
||
{
|
||
curPd = pd;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (curPd == NULL)
|
||
continue;
|
||
|
||
// Get the memory usage, will only work for TCC drivers
|
||
result = nvmlDeviceGetMemoryInfo(device, &memory);
|
||
if (NVML_SUCCESS != result)
|
||
{
|
||
return;
|
||
}
|
||
curPd->memory = memory;
|
||
|
||
// Get the memory usage, will only work for TCC drivers
|
||
result = nvmlDeviceGetUtilizationRates(device, &utilization);
|
||
if (NVML_SUCCESS != result)
|
||
{
|
||
return;
|
||
}
|
||
if (m_queryCount)
|
||
{
|
||
// average, slightly overweighting the most recent query
|
||
curPd->utilization.gpu = (curPd->utilization.gpu*m_queryCount + utilization.gpu * 2) / (m_queryCount + 2);
|
||
curPd->utilization.memory = (curPd->utilization.memory*m_queryCount + utilization.memory * 2) / (m_queryCount + 2);
|
||
}
|
||
else
|
||
{
|
||
curPd->utilization = utilization;
|
||
}
|
||
m_queryCount++;
|
||
|
||
unsigned size = 0;
|
||
result = nvmlDeviceGetComputeRunningProcesses(device, &size, NULL);
|
||
if (size > 0)
|
||
{
|
||
std::vector<nvmlProcessInfo_t> processInfo(size);
|
||
processInfo.resize(size);
|
||
for (nvmlProcessInfo_t info : processInfo)
|
||
info.usedGpuMemory = 0;
|
||
result = nvmlDeviceGetComputeRunningProcesses(device, &size, &processInfo[0]);
|
||
if (NVML_SUCCESS != result)
|
||
{
|
||
return;
|
||
}
|
||
bool cnFound = false;
|
||
bool dbnFound = false;
|
||
for (nvmlProcessInfo_t info : processInfo)
|
||
{
|
||
std::string name;
|
||
name.resize(256);
|
||
unsigned len = (unsigned)name.length();
|
||
nvmlSystemGetProcessName(info.pid, (char*)name.data(), len);
|
||
name.resize(strlen(name.c_str()));
|
||
size_t pos = name.find_last_of('\\');
|
||
if (pos != std::string::npos)
|
||
name = name.substr(pos + 1);
|
||
if (GetCurrentProcessId() == info.pid || name.length() == 0)
|
||
continue;
|
||
cnFound = (cnFound || (!name.compare("cn.exe")));
|
||
dbnFound = (dbnFound || (!name.compare("dbn.exe")));
|
||
}
|
||
// set values to save
|
||
curPd->cnFound = cnFound;
|
||
curPd->dbnFound = dbnFound;
|
||
}
|
||
}
|
||
m_nvmlData = true;
|
||
return;
|
||
}
|
||
|
||
}}}
|
||
|
||
#ifdef _WIN32
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// some interfacing with the Windows DLL system to ensure clean shutdown vs. delay loading of CUDA DLLs
|
||
// ---------------------------------------------------------------------------
|
||
|
||
// The "notify hook" gets called for every call to the
|
||
// delay load helper. This allows a user to hook every call and
|
||
// skip the delay load helper entirely.
|
||
//
|
||
// dliNotify == { dliStartProcessing | dliNotePreLoadLibrary | dliNotePreGetProc | dliNoteEndProcessing } on this call.
|
||
|
||
extern "C" INT_PTR WINAPI DelayLoadNofify(
|
||
unsigned dliNotify,
|
||
PDelayLoadInfo pdli
|
||
)
|
||
{
|
||
// load the library from an alternate path
|
||
if (dliNotify == dliNotePreLoadLibrary && !strcmp(pdli->szDll, "nvml.dll"))
|
||
{
|
||
WCHAR *path;
|
||
WCHAR nvmlPath[MAX_PATH] = { 0 };
|
||
HRESULT hr = SHGetKnownFolderPath(FOLDERID_ProgramFiles, 0, NULL, &path);
|
||
lstrcpy(nvmlPath, path);
|
||
CoTaskMemFree(path);
|
||
if (SUCCEEDED(hr))
|
||
{
|
||
HMODULE module = NULL;
|
||
WCHAR* dllName = L"\\NVIDIA Corporation\\NVSMI\\nvml.dll";
|
||
lstrcat(nvmlPath, dllName);
|
||
module = LoadLibraryEx(nvmlPath, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
|
||
return (INT_PTR)module;
|
||
}
|
||
}
|
||
// check for failed GetProc, old version of the driver
|
||
if (dliNotify == dliFailGetProc && !strcmp(pdli->szDll, "nvml.dll"))
|
||
{
|
||
char name[256];
|
||
size_t len = strlen(pdli->dlp.szProcName);
|
||
strcpy_s(name, pdli->dlp.szProcName);
|
||
// if the version 2 APIs are not supported, truncate "_v2"
|
||
if (len>3 && name[len-1] == '2')
|
||
name[len-3] = 0;
|
||
FARPROC pfnRet = ::GetProcAddress(pdli->hmodCur, name);
|
||
return (INT_PTR)pfnRet;
|
||
}
|
||
|
||
return NULL;
|
||
}
|
||
|
||
ExternC
|
||
PfnDliHook __pfnDliNotifyHook2 = (PfnDliHook)DelayLoadNofify;
|
||
// This is the failure hook, dliNotify = {dliFailLoadLib|dliFailGetProc}
|
||
ExternC
|
||
PfnDliHook __pfnDliFailureHook2 = (PfnDliHook)DelayLoadNofify;
|
||
|
||
#endif // _WIN32
|
||
|
||
#endif // CPUONLY
|