CNTK/Source/Math/cudalib.cpp

84 строки
2.5 KiB
C++
Исходник Обычный вид История

// cudalib.cpp -- all CUDA calls (but not cublas) are encapsulated here
// All actual CUDA API calls go here, to keep the header out of our other headers.
//
// F. Seide, V-hansu
#define _CRT_SECURE_NO_WARNINGS 1 // so we can use getenv()...
#include "Basics.h"
#include <cuda_runtime_api.h> // for CUDA API
#include <cuda.h> // for device API
#include "cudalib.h"
#include "cudadevice.h"
#include <string>
#include <assert.h>
#include <cublas_v2.h>
#undef NOMULTIDEVICE // define this to disable any context/driver stuff
#ifndef NOMULTIDEVICE
#pragma comment (lib, "cuda.lib") // link CUDA device API
#endif
#pragma comment (lib, "cudart.lib") // link CUDA runtime
#pragma comment (lib, "cublas.lib")
namespace msra { namespace cuda {
static int devicesallocated = -1; // -1 means not initialized
// allows to write cudaFunction() || "error" (CUDA runtime)
static void operator|| (cudaError_t rc, const char * msg)
{
if (rc != cudaSuccess)
RuntimeError("%s: %s (cuda error %d)", msg, cudaGetErrorString(rc), (int)rc);
}
cudaStream_t GetCurrentStream() { return cudaStreamDefault; }
// synchronize with ongoing thread
void join()
{
cudaDeviceSynchronize() || "cudaDeviceSynchronize failed";
}
// allocate a stack to store the devices that have been pushed
const int stackSize = 20;
static int curStack = 0;
static size_t deviceStack[stackSize] = {0};
// memory allocation
void * mallocbytes (size_t nelem, size_t sz)
{
for (size_t retry = 0; ; retry++)
{
try
{
//fprintf (stderr, "mallocbytes: allocating %d elements of size %d, %d bytes\n", (int) nelem, (int) sz, (int) (nelem * sz)); // comment out by [v-hansu] to get rid out annoying output
void * p;
cudaMalloc (&p, nelem * sz) || "cudaMalloc failed";
return p;
}
catch (const std::exception & e)
{
fprintf (stderr, "mallocbytes: failed with error %s\n", e.what());
if (retry >= 5)
throw;
}
}
}
void freebytes (void * p) { cudaFree (p) || "cudaFree failed"; }
void memcpyh2d (void * dst, size_t byteoffset, const void * src, size_t nbytes)
{
cudaMemcpy (byteoffset + (char*) dst, src, nbytes, cudaMemcpyHostToDevice) || "cudaMemcpy failed";
}
void memcpyd2h (void * dst, const void * src, size_t byteoffset, size_t nbytes)
{
cudaMemcpy (dst, byteoffset + (const char *) src, nbytes, cudaMemcpyDeviceToHost) || "cudaMemcpy failed";
}
};};