38 bool CudaModule::s_inited =
false;
39 bool CudaModule::s_available =
false;
40 CUdevice CudaModule::s_device =
NULL;
41 CUcontext CudaModule::s_context =
NULL;
42 CUevent CudaModule::s_startEvent =
NULL;
43 CUevent CudaModule::s_endEvent =
NULL;
65 for (
int i = 0; i < m_globals.getSize(); i++)
68 checkError(
"cuModuleUnload", cuModuleUnload(m_module));
75 return (findKernel(name) !=
NULL);
82 CUfunction kernel = findKernel(name);
84 fail(
"CudaModule: Kernel not found '%s'!", name.
getPtr());
93 checkError(
"cuParamSeti", cuParamSeti(kernel, offset, value));
111 checkError(
"cuParamSetv", cuParamSetv(kernel, offset, &value,
sizeof(CUdeviceptr)));
112 return sizeof(CUdeviceptr);
119 S32* found = m_globalHash.search(name);
121 return *m_globals[*found];
130 m_globalHash.add(name, m_globals.getSize());
131 m_globals.add(buffer);
139 for (
int i = 0; i < m_globals.getSize(); i++)
140 m_globals[i]->setOwner(
Buffer::Cuda,
true, async, stream);
147 S32* found = m_texRefHash.search(name);
149 return m_texRefs[*found];
152 checkError(
"cuModuleGetTexRef", cuModuleGetTexRef(&texRef, m_module, name.
getPtr()));
154 m_texRefHash.add(name, m_texRefs.
getSize());
155 m_texRefs.
add(texRef);
170 fail(
"CudaModule::setTexRefMode(): Built without FW_USE_CUDA!");
174 CUaddress_mode addressMode = (wrap) ? CU_TR_ADDRESS_MODE_WRAP : CU_TR_ADDRESS_MODE_CLAMP;
175 CUfilter_mode filterMode = (bilinear) ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT;
178 if (normalizedCoords)
179 flags |= CU_TRSF_NORMALIZED_COORDINATES;
181 flags |= CU_TRSF_READ_AS_INTEGER;
185 checkError(
"cuTexRefSetFilterMode", cuTexRefSetFilterMode(texRef, filterMode));
203 checkError(
"cuTexRefSetFormat", cuTexRefSetFormat(texRef, format, numComponents));
212 setTexRefMode(texRef, wrap, bilinear, normalizedCoords, readAsInt);
234 fail(
"CudaModule::setTexRef(Image): Built without FW_USE_CUDA!");
236 #elif (CUDA_VERSION < 2020)
238 fail(
"CudaModule: setTexRef(Image) requires CUDA 2.2 or later!");
242 CUDA_ARRAY_DESCRIPTOR desc;
245 fail(
"CudaModule: Unsupported image format in setTexRef(Image)!");
248 setTexRefMode(texRef, wrap, bilinear, normalizedCoords, readAsInt);
269 fail(
"CudaModule::updateTexRefs(): Built without FW_USE_CUDA!");
276 for (
int i = 0; i < m_texRefs.
getSize(); i++)
285 #if (CUDA_VERSION >= 3010)
287 checkError(
"cuModuleGetSurfRef", cuModuleGetSurfRef(&surfRef, m_module, name.
getPtr()));
291 fail(
"CudaModule: getSurfRef() requires CUDA 3.1 or later!");
300 #if (CUDA_VERSION >= 3010)
305 fail(
"CudaModule: setSurfRef() requires CUDA 3.1 or later!");
318 if (!isAvailable_cuInit())
321 CUresult res = cuInit(0);
322 if (res != CUDA_SUCCESS)
325 if (res != CUDA_ERROR_NO_DEVICE)
332 s_device = selectDevice();
333 printDeviceInfo(s_device);
337 flags |= CU_CTX_SCHED_SPIN;
339 #if (CUDA_VERSION >= 2030)
341 flags |= CU_CTX_LMEM_RESIZE_TO_MAX;
344 if (!isAvailable_cuGLCtxCreate())
345 checkError(
"cuCtxCreate", cuCtxCreate(&s_context, flags, s_device));
352 if (isAvailable_cuEventCreate())
392 cuMemGetInfo(&free, &total);
403 if (!yield || !s_endEvent)
414 if (res != CUDA_ERROR_NOT_READY)
431 default: error =
"Unknown CUresult";
break;
432 case CUDA_SUCCESS: error =
"No error";
break;
435 case CUDA_ERROR_INVALID_VALUE: error =
"Invalid value";
break;
436 case CUDA_ERROR_OUT_OF_MEMORY: error =
"Out of memory";
break;
437 case CUDA_ERROR_NOT_INITIALIZED: error =
"Not initialized";
break;
438 case CUDA_ERROR_DEINITIALIZED: error =
"Deinitialized";
break;
439 case CUDA_ERROR_NO_DEVICE: error =
"No device";
break;
440 case CUDA_ERROR_INVALID_DEVICE: error =
"Invalid device";
break;
441 case CUDA_ERROR_INVALID_IMAGE: error =
"Invalid image";
break;
442 case CUDA_ERROR_INVALID_CONTEXT: error =
"Invalid context";
break;
443 case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: error =
"Context already current";
break;
444 case CUDA_ERROR_MAP_FAILED: error =
"Map failed";
break;
445 case CUDA_ERROR_UNMAP_FAILED: error =
"Unmap failed";
break;
446 case CUDA_ERROR_ARRAY_IS_MAPPED: error =
"Array is mapped";
break;
447 case CUDA_ERROR_ALREADY_MAPPED: error =
"Already mapped";
break;
448 case CUDA_ERROR_NO_BINARY_FOR_GPU: error =
"No binary for GPU";
break;
449 case CUDA_ERROR_ALREADY_ACQUIRED: error =
"Already acquired";
break;
450 case CUDA_ERROR_NOT_MAPPED: error =
"Not mapped";
break;
451 case CUDA_ERROR_INVALID_SOURCE: error =
"Invalid source";
break;
452 case CUDA_ERROR_FILE_NOT_FOUND: error =
"File not found";
break;
453 case CUDA_ERROR_INVALID_HANDLE: error =
"Invalid handle";
break;
454 case CUDA_ERROR_NOT_FOUND: error =
"Not found";
break;
455 case CUDA_ERROR_NOT_READY: error =
"Not ready";
break;
456 case CUDA_ERROR_LAUNCH_FAILED: error =
"Launch failed";
break;
457 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: error =
"Launch out of resources";
break;
458 case CUDA_ERROR_LAUNCH_TIMEOUT: error =
"Launch timeout";
break;
459 case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: error =
"Launch incompatible texturing";
break;
460 case CUDA_ERROR_UNKNOWN: error =
"Unknown error";
break;
463 #if (CUDA_VERSION >= 4000) // TODO: Some of these may exist in earlier versions, too.
464 case CUDA_ERROR_PROFILER_DISABLED: error =
"Profiler disabled";
break;
465 case CUDA_ERROR_PROFILER_NOT_INITIALIZED: error =
"Profiler not initialized";
break;
466 case CUDA_ERROR_PROFILER_ALREADY_STARTED: error =
"Profiler already started";
break;
467 case CUDA_ERROR_PROFILER_ALREADY_STOPPED: error =
"Profiler already stopped";
break;
468 case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: error =
"Not mapped as array";
break;
469 case CUDA_ERROR_NOT_MAPPED_AS_POINTER: error =
"Not mapped as pointer";
break;
470 case CUDA_ERROR_ECC_UNCORRECTABLE: error =
"ECC uncorrectable";
break;
471 case CUDA_ERROR_UNSUPPORTED_LIMIT: error =
"Unsupported limit";
break;
472 case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: error =
"Context already in use";
break;
473 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: error =
"Shared object symbol not found";
break;
474 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: error =
"Shared object init failed";
break;
475 case CUDA_ERROR_OPERATING_SYSTEM: error =
"Operating system error";
break;
476 case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: error =
"Peer access already enabled";
break;
477 case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: error =
"Peer access not enabled";
break;
478 case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: error =
"Primary context active";
break;
479 case CUDA_ERROR_CONTEXT_IS_DESTROYED: error =
"Context is destroyed";
break;
489 if (res != CUDA_SUCCESS)
498 #if (CUDA_VERSION >= 2020)
499 if (isAvailable_cuDriverGetVersion())
500 cuDriverGetVersion(&version);
503 return version / 10 + version % 10;
516 checkError(
"cuDeviceComputeCapability", cuDeviceComputeCapability(&major, &minor, s_device));
517 return major * 10 + minor;
529 checkError(
"cuDeviceGetAttribute", cuDeviceGetAttribute(&value, attrib, s_device));
535 CUdevice CudaModule::selectDevice(
void)
539 fail(
"Image::chooseCudaFormat(): Built without FW_USE_CUDA!");
547 checkError(
"cuDeviceGetCount", cuDeviceGetCount(&numDevices));
549 for (
int i = 0; i < numDevices; i++)
556 checkError(
"cuDeviceGetAttribute", cuDeviceGetAttribute(&clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev));
557 checkError(
"cuDeviceGetAttribute", cuDeviceGetAttribute(&numProcessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev));
559 S32 score = clockRate * numProcessors;
560 if (score > bestScore)
568 fail(
"CudaModule: No appropriate CUDA device found!");
576 void CudaModule::printDeviceInfo(CUdevice device)
581 fail(
"CudaModule::printDeviceInfo(): Built without FW_USE_CUDA!");
587 CUdevice_attribute attrib;
591 #define A21(ENUM, NAME) { CU_DEVICE_ATTRIBUTE_ ## ENUM, NAME },
592 #if (CUDA_VERSION >= 4000)
593 # define A40(ENUM, NAME) A21(ENUM, NAME)
595 # define A40(ENUM, NAME) // TODO: Some of these may exist in earlier versions, too.
598 A21(CLOCK_RATE,
"Clock rate")
599 A40(MEMORY_CLOCK_RATE, "Memory clock rate")
600 A21(MULTIPROCESSOR_COUNT, "Number of SMs")
604 A21(MAX_THREADS_PER_BLOCK, "Max threads per block")
605 A40(MAX_THREADS_PER_MULTIPROCESSOR, "Max threads per SM")
606 A21(REGISTERS_PER_BLOCK, "Max registers per block")
607 A21(SHARED_MEMORY_PER_BLOCK, "Max shared mem per block")
608 A21(TOTAL_CONSTANT_MEMORY, "Constant memory")
611 A21(MAX_BLOCK_DIM_X, "Max blockDim.
x")
614 A21(MAX_GRID_DIM_X, "Max gridDim.
x")
636 A40(CONCURRENT_KERNELS, "Concurrent launches supported")
637 A21(GPU_OVERLAP, "Concurrent memcopy supported")
638 A40(ASYNC_ENGINE_COUNT, "Max concurrent memcopies")
641 A40(UNIFIED_ADDRESSING, "Unified addressing supported")
642 A40(CAN_MAP_HOST_MEMORY, "Can map host memory")
643 A40(ECC_ENABLED, "ECC enabled")
662 checkError(
"cuDeviceComputeCapability", cuDeviceComputeCapability(&major, &minor, device));
667 printf(
"%-32s%s\n",
sprintf(
"CUDA device %d", (
int)device).getPtr(), name);
668 printf(
"%-32s%s\n",
"---",
"---");
669 printf(
"%-32s%d.%d\n",
"Compute capability", major, minor);
670 printf(
"%-32s%.0f megs\n",
"Total memory", (
F32)memory *
exp2(-20));
675 if (cuDeviceGetAttribute(&value, attribs[i].attrib, device) == CUDA_SUCCESS)
676 printf(
"%-32s%d\n", attribs[i].name, value);
685 Vec2i CudaModule::selectGridSize(
int numBlocks)
687 int maxWidth = 65536;
689 checkError(
"cuDeviceGetAttribute", cuDeviceGetAttribute(&maxWidth, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, s_device));
693 while (
size.x > maxWidth)
703 CUfunction CudaModule::findKernel(
const String& name)
707 CUfunction* found = m_kernels.search(name);
713 CUfunction kernel =
NULL;
722 m_kernels.add(name, kernel);
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx cuModuleLoadData
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int offset
static S64 getMemoryUsed(void)
const char * getPtr(void) const
static int getDriverVersion(void)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes cuParamSetf
static void staticDeinit(void)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer
CUdevice int ordinal char int CUdevice dev
void wrapCuda(CUdeviceptr cudaPtr, S64 size)
CUdeviceptr getCudaPtr(S64 ofs=0)
CudaModule(const void *cubin)
CudaKernel getKernel(const String &name)
int setParami(CUfunction kernel, int offset, S32 value)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx cuCtxSynchronize
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat x
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev cuCtxDestroy
CUtexref getTexRef(const String &name)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj cuDeviceTotalMem
void setTexRef(const String &name, Buffer &buf, CUarray_format format, int numComponents)
void setSurfRef(const String &name, CUarray cudaArray)
static void checkError(const char *funcName, CUresult res)
static int getComputeCapability(void)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin cuModuleGetFunction
int setParamf(CUfunction kernel, int offset, F32 value)
CUdevice int ordinal cuDeviceGetName
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes cuGLCtxCreate
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am cuTexRefSetFlags
String sprintf(const char *fmt,...)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int dim
ImageFormat chooseCudaFormat(CUDA_ARRAY_DESCRIPTOR *desc=NULL, ImageFormat::ID desiredFormat=ImageFormat::ID_Max) const
Buffer & getBuffer(void) const
Buffer & getGlobal(const String &name)
void setTexRefMode(CUtexref texRef, bool wrap=true, bool bilinear=true, bool normalizedCoords=true, bool readAsInt=false)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value
void unsetTexRef(const String &name)
const ImageFormat & getFormat(void) const
S64 getStride(void) const
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent cuEventDestroy
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height cuEventCreate
void printf(const char *fmt,...)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags cuEventQuery
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
void updateGlobals(bool async=false, CUstream stream=NULL)
static void sync(bool yield=true)
bool hasKernel(const String &name)
static int getDeviceAttribute(CUdevice_attribute attrib)
CUsurfref getSurfRef(const String &name)
int setParamPtr(CUfunction kernel, int offset, CUdeviceptr value)
static const char * decodeError(CUresult res)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef cuTexRefSetArray
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray cuTexRefSetAddress
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags cuTexRefSetAddressMode
static void staticInit(void)
void fail(const char *fmt,...)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void * image
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr size
static void staticInit(void)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value cuParamSetTexRef
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev cuModuleGetGlobal
void updateTexRefs(CUfunction kernel)