NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CudaModule.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2011, NVIDIA Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA Corporation nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "gpu/CudaModule.hpp"
29 #include "base/Thread.hpp"
30 #include "base/Timer.hpp"
31 #include "gpu/CudaCompiler.hpp"
32 #include "gui/Image.hpp"
33 
34 using namespace FW;
35 
36 //------------------------------------------------------------------------
37 
38 bool CudaModule::s_inited = false;
39 bool CudaModule::s_available = false;
40 CUdevice CudaModule::s_device = NULL;
41 CUcontext CudaModule::s_context = NULL;
42 CUevent CudaModule::s_startEvent = NULL;
43 CUevent CudaModule::s_endEvent = NULL;
44 
45 //------------------------------------------------------------------------
46 
47 CudaModule::CudaModule(const void* cubin)
48 {
49  staticInit();
50  checkError("cuModuleLoadData", cuModuleLoadData(&m_module, cubin));
51 }
52 
53 //------------------------------------------------------------------------
54 
55 CudaModule::CudaModule(const String& cubinFile)
56 {
57  staticInit();
58  checkError("cuModuleLoad", cuModuleLoad(&m_module, cubinFile.getPtr()));
59 }
60 
61 //------------------------------------------------------------------------
62 
64 {
65  for (int i = 0; i < m_globals.getSize(); i++)
66  delete m_globals[i];
67 
68  checkError("cuModuleUnload", cuModuleUnload(m_module));
69 }
70 
71 //------------------------------------------------------------------------
72 
74 {
75  return (findKernel(name) != NULL);
76 }
77 
78 //------------------------------------------------------------------------
79 
81 {
82  CUfunction kernel = findKernel(name);
83  if (!kernel)
84  fail("CudaModule: Kernel not found '%s'!", name.getPtr());
85  return CudaKernel(this, kernel);
86 }
87 
88 //------------------------------------------------------------------------
89 
90 int CudaModule::setParami(CUfunction kernel, int offset, S32 value)
91 {
92  if (kernel)
93  checkError("cuParamSeti", cuParamSeti(kernel, offset, value));
94  return sizeof(S32);
95 }
96 
97 //------------------------------------------------------------------------
98 
99 int CudaModule::setParamf(CUfunction kernel, int offset, F32 value)
100 {
101  if (kernel)
102  checkError("cuParamSetf", cuParamSetf(kernel, offset, value));
103  return sizeof(F32);
104 }
105 
106 //------------------------------------------------------------------------
107 
108 int CudaModule::setParamPtr(CUfunction kernel, int offset, CUdeviceptr value)
109 {
110  if (kernel)
111  checkError("cuParamSetv", cuParamSetv(kernel, offset, &value, sizeof(CUdeviceptr)));
112  return sizeof(CUdeviceptr);
113 }
114 
115 //------------------------------------------------------------------------
116 
118 {
119  S32* found = m_globalHash.search(name);
120  if (found)
121  return *m_globals[*found];
122 
123  CUdeviceptr ptr;
124  CUsize_t size;
125  checkError("cuModuleGetGlobal", cuModuleGetGlobal(&ptr, &size, m_module, name.getPtr()));
126 
127  Buffer* buffer = new Buffer;
128  buffer->wrapCuda(ptr, size);
129 
130  m_globalHash.add(name, m_globals.getSize());
131  m_globals.add(buffer);
132  return *buffer;
133 }
134 
135 //------------------------------------------------------------------------
136 
137 void CudaModule::updateGlobals(bool async, CUstream stream)
138 {
139  for (int i = 0; i < m_globals.getSize(); i++)
140  m_globals[i]->setOwner(Buffer::Cuda, true, async, stream);
141 }
142 
143 //------------------------------------------------------------------------
144 
146 {
147  S32* found = m_texRefHash.search(name);
148  if (found)
149  return m_texRefs[*found];
150 
151  CUtexref texRef;
152  checkError("cuModuleGetTexRef", cuModuleGetTexRef(&texRef, m_module, name.getPtr()));
153 
154  m_texRefHash.add(name, m_texRefs.getSize());
155  m_texRefs.add(texRef);
156  return texRef;
157 }
158 
159 //------------------------------------------------------------------------
160 
161 void CudaModule::setTexRefMode(CUtexref texRef, bool wrap, bool bilinear, bool normalizedCoords, bool readAsInt)
162 {
163 #if (!FW_USE_CUDA)
164 
165  FW_UNREF(texRef);
166  FW_UNREF(wrap);
167  FW_UNREF(bilinear);
168  FW_UNREF(normalizedCoords);
169  FW_UNREF(readAsInt);
170  fail("CudaModule::setTexRefMode(): Built without FW_USE_CUDA!");
171 
172 #else
173 
174  CUaddress_mode addressMode = (wrap) ? CU_TR_ADDRESS_MODE_WRAP : CU_TR_ADDRESS_MODE_CLAMP;
175  CUfilter_mode filterMode = (bilinear) ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT;
176 
177  U32 flags = 0;
178  if (normalizedCoords)
179  flags |= CU_TRSF_NORMALIZED_COORDINATES;
180  if (readAsInt)
181  flags |= CU_TRSF_READ_AS_INTEGER;
182 
183  for (int dim = 0; dim < 3; dim++)
184  checkError("cuTexRefSetAddressMode", cuTexRefSetAddressMode(texRef, dim, addressMode));
185  checkError("cuTexRefSetFilterMode", cuTexRefSetFilterMode(texRef, filterMode));
186  checkError("cuTexRefSetFlags", cuTexRefSetFlags(texRef, flags));
187 
188 #endif
189 }
190 
191 //------------------------------------------------------------------------
192 
193 void CudaModule::setTexRef(const String& name, Buffer& buf, CUarray_format format, int numComponents)
194 {
195  setTexRef(name, buf.getCudaPtr(), buf.getSize(), format, numComponents);
196 }
197 
198 //------------------------------------------------------------------------
199 
200 void CudaModule::setTexRef(const String& name, CUdeviceptr ptr, S64 size, CUarray_format format, int numComponents)
201 {
202  CUtexref texRef = getTexRef(name);
203  checkError("cuTexRefSetFormat", cuTexRefSetFormat(texRef, format, numComponents));
204  checkError("cuTexRefSetAddress", cuTexRefSetAddress(NULL, texRef, ptr, (U32)size));
205 }
206 
207 //------------------------------------------------------------------------
208 
209 void CudaModule::setTexRef(const String& name, CUarray cudaArray, bool wrap, bool bilinear, bool normalizedCoords, bool readAsInt)
210 {
211  CUtexref texRef = getTexRef(name);
212  setTexRefMode(texRef, wrap, bilinear, normalizedCoords, readAsInt);
213 
214 #if FW_USE_CUDA
215  checkError("cuTexRefSetArray", cuTexRefSetArray(texRef, cudaArray, CU_TRSA_OVERRIDE_FORMAT));
216 #else
217  FW_UNREF(cudaArray);
218 #endif
219 }
220 
221 //------------------------------------------------------------------------
222 
223 void CudaModule::setTexRef(const String& name, const Image& image, bool wrap, bool bilinear, bool normalizedCoords, bool readAsInt)
224 {
225  FW_UNREF(name);
226  FW_UNREF(image);
227  FW_UNREF(wrap);
228  FW_UNREF(bilinear);
229  FW_UNREF(normalizedCoords);
230  FW_UNREF(readAsInt);
231 
232 #if (!FW_USE_CUDA)
233 
234  fail("CudaModule::setTexRef(Image): Built without FW_USE_CUDA!");
235 
236 #elif (CUDA_VERSION < 2020)
237 
238  fail("CudaModule: setTexRef(Image) requires CUDA 2.2 or later!");
239 
240 #else
241 
242  CUDA_ARRAY_DESCRIPTOR desc;
243  ImageFormat format = image.chooseCudaFormat(&desc);
244  if (format != image.getFormat())
245  fail("CudaModule: Unsupported image format in setTexRef(Image)!");
246 
247  CUtexref texRef = getTexRef(name);
248  setTexRefMode(texRef, wrap, bilinear, normalizedCoords, readAsInt);
249  checkError("cuTexRefSetAddress2D", cuTexRefSetAddress2D(texRef, &desc, image.getBuffer().getCudaPtr(), (size_t)image.getStride()));
250 
251 #endif
252 }
253 
254 //------------------------------------------------------------------------
255 
257 {
258  CUtexref texRef = getTexRef(name);
259  checkError("cuTexRefSetAddress", cuTexRefSetAddress(NULL, texRef, NULL, 0));
260 }
261 
262 //------------------------------------------------------------------------
263 
264 void CudaModule::updateTexRefs(CUfunction kernel)
265 {
266 #if (!FW_USE_CUDA)
267 
268  FW_UNREF(kernel);
269  fail("CudaModule::updateTexRefs(): Built without FW_USE_CUDA!");
270 
271 #else
272 
273  if (getDriverVersion() >= 32)
274  return;
275 
276  for (int i = 0; i < m_texRefs.getSize(); i++)
277  checkError("cuParamSetTexRef", cuParamSetTexRef(kernel, CU_PARAM_TR_DEFAULT, m_texRefs[i]));
278 #endif
279 }
280 
281 //------------------------------------------------------------------------
282 
284 {
285 #if (CUDA_VERSION >= 3010)
286  CUsurfref surfRef;
287  checkError("cuModuleGetSurfRef", cuModuleGetSurfRef(&surfRef, m_module, name.getPtr()));
288  return surfRef;
289 #else
290  FW_UNREF(name);
291  fail("CudaModule: getSurfRef() requires CUDA 3.1 or later!");
292  return NULL;
293 #endif
294 }
295 
296 //------------------------------------------------------------------------
297 
298 void CudaModule::setSurfRef(const String& name, CUarray cudaArray)
299 {
300 #if (CUDA_VERSION >= 3010)
301  checkError("cuSurfRefSetArray", cuSurfRefSetArray(getSurfRef(name), cudaArray, 0));
302 #else
303  FW_UNREF(name);
304  FW_UNREF(cudaArray);
305  fail("CudaModule: setSurfRef() requires CUDA 3.1 or later!");
306 #endif
307 }
308 
309 //------------------------------------------------------------------------
310 
312 {
313  if (s_inited)
314  return;
315  s_inited = true;
316  s_available = false;
317 
318  if (!isAvailable_cuInit())
319  return;
320 
321  CUresult res = cuInit(0);
322  if (res != CUDA_SUCCESS)
323  {
324 #if FW_USE_CUDA
325  if (res != CUDA_ERROR_NO_DEVICE)
326  checkError("cuInit", res);
327 #endif
328  return;
329  }
330 
331  s_available = true;
332  s_device = selectDevice();
333  printDeviceInfo(s_device);
334 
335  U32 flags = 0;
336 #if FW_USE_CUDA
337  flags |= CU_CTX_SCHED_SPIN; // use sync() if you want to yield
338 #endif
339 #if (CUDA_VERSION >= 2030)
340  if (getDriverVersion() >= 23)
341  flags |= CU_CTX_LMEM_RESIZE_TO_MAX; // reduce launch overhead with large localmem
342 #endif
343 
344  if (!isAvailable_cuGLCtxCreate())
345  checkError("cuCtxCreate", cuCtxCreate(&s_context, flags, s_device));
346  else
347  {
349  checkError("cuGLCtxCreate", cuGLCtxCreate(&s_context, flags, s_device));
350  }
351 
352  if (isAvailable_cuEventCreate())
353  {
354  checkError("cuEventCreate", cuEventCreate(&s_startEvent, 0));
355  checkError("cuEventCreate", cuEventCreate(&s_endEvent, 0));
356  }
357 }
358 
359 //------------------------------------------------------------------------
360 
362 {
363  if (!s_inited)
364  return;
365  s_inited = false;
366 
367  if (s_startEvent)
368  checkError("cuEventDestroy", cuEventDestroy(s_startEvent));
369  s_startEvent = NULL;
370 
371  if (s_endEvent)
372  checkError("cuEventDestroy", cuEventDestroy(s_endEvent));
373  s_endEvent = NULL;
374 
375  if (s_context)
376  checkError("cuCtxDestroy", cuCtxDestroy(s_context));
377  s_context = NULL;
378 
379  s_device = NULL;
380 }
381 
382 //------------------------------------------------------------------------
383 
385 {
386  staticInit();
387  if (!s_available)
388  return 0;
389 
390  CUsize_t free = 0;
391  CUsize_t total = 0;
392  cuMemGetInfo(&free, &total);
393  return total - free;
394 }
395 
396 //------------------------------------------------------------------------
397 
398 void CudaModule::sync(bool yield)
399 {
400  if (!s_inited)
401  return;
402 
403  if (!yield || !s_endEvent)
404  {
405  checkError("cuCtxSynchronize", cuCtxSynchronize());
406  return;
407  }
408 
409 #if FW_USE_CUDA
410  checkError("cuEventRecord", cuEventRecord(s_endEvent, NULL));
411  for (;;)
412  {
413  CUresult res = cuEventQuery(s_endEvent);
414  if (res != CUDA_ERROR_NOT_READY)
415  {
416  checkError("cuEventQuery", res);
417  break;
418  }
419  Thread::yield();
420  }
421 #endif
422 }
423 
424 //------------------------------------------------------------------------
425 
426 const char* CudaModule::decodeError(CUresult res)
427 {
428  const char* error;
429  switch (res)
430  {
431  default: error = "Unknown CUresult"; break;
432  case CUDA_SUCCESS: error = "No error"; break;
433 
434 #if FW_USE_CUDA
435  case CUDA_ERROR_INVALID_VALUE: error = "Invalid value"; break;
436  case CUDA_ERROR_OUT_OF_MEMORY: error = "Out of memory"; break;
437  case CUDA_ERROR_NOT_INITIALIZED: error = "Not initialized"; break;
438  case CUDA_ERROR_DEINITIALIZED: error = "Deinitialized"; break;
439  case CUDA_ERROR_NO_DEVICE: error = "No device"; break;
440  case CUDA_ERROR_INVALID_DEVICE: error = "Invalid device"; break;
441  case CUDA_ERROR_INVALID_IMAGE: error = "Invalid image"; break;
442  case CUDA_ERROR_INVALID_CONTEXT: error = "Invalid context"; break;
443  case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: error = "Context already current"; break;
444  case CUDA_ERROR_MAP_FAILED: error = "Map failed"; break;
445  case CUDA_ERROR_UNMAP_FAILED: error = "Unmap failed"; break;
446  case CUDA_ERROR_ARRAY_IS_MAPPED: error = "Array is mapped"; break;
447  case CUDA_ERROR_ALREADY_MAPPED: error = "Already mapped"; break;
448  case CUDA_ERROR_NO_BINARY_FOR_GPU: error = "No binary for GPU"; break;
449  case CUDA_ERROR_ALREADY_ACQUIRED: error = "Already acquired"; break;
450  case CUDA_ERROR_NOT_MAPPED: error = "Not mapped"; break;
451  case CUDA_ERROR_INVALID_SOURCE: error = "Invalid source"; break;
452  case CUDA_ERROR_FILE_NOT_FOUND: error = "File not found"; break;
453  case CUDA_ERROR_INVALID_HANDLE: error = "Invalid handle"; break;
454  case CUDA_ERROR_NOT_FOUND: error = "Not found"; break;
455  case CUDA_ERROR_NOT_READY: error = "Not ready"; break;
456  case CUDA_ERROR_LAUNCH_FAILED: error = "Launch failed"; break;
457  case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: error = "Launch out of resources"; break;
458  case CUDA_ERROR_LAUNCH_TIMEOUT: error = "Launch timeout"; break;
459  case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: error = "Launch incompatible texturing"; break;
460  case CUDA_ERROR_UNKNOWN: error = "Unknown error"; break;
461 #endif
462 
463 #if (CUDA_VERSION >= 4000) // TODO: Some of these may exist in earlier versions, too.
464  case CUDA_ERROR_PROFILER_DISABLED: error = "Profiler disabled"; break;
465  case CUDA_ERROR_PROFILER_NOT_INITIALIZED: error = "Profiler not initialized"; break;
466  case CUDA_ERROR_PROFILER_ALREADY_STARTED: error = "Profiler already started"; break;
467  case CUDA_ERROR_PROFILER_ALREADY_STOPPED: error = "Profiler already stopped"; break;
468  case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: error = "Not mapped as array"; break;
469  case CUDA_ERROR_NOT_MAPPED_AS_POINTER: error = "Not mapped as pointer"; break;
470  case CUDA_ERROR_ECC_UNCORRECTABLE: error = "ECC uncorrectable"; break;
471  case CUDA_ERROR_UNSUPPORTED_LIMIT: error = "Unsupported limit"; break;
472  case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: error = "Context already in use"; break;
473  case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: error = "Shared object symbol not found"; break;
474  case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: error = "Shared object init failed"; break;
475  case CUDA_ERROR_OPERATING_SYSTEM: error = "Operating system error"; break;
476  case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: error = "Peer access already enabled"; break;
477  case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: error = "Peer access not enabled"; break;
478  case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: error = "Primary context active"; break;
479  case CUDA_ERROR_CONTEXT_IS_DESTROYED: error = "Context is destroyed"; break;
480 #endif
481  }
482  return error;
483 }
484 
485 //------------------------------------------------------------------------
486 
487 void CudaModule::checkError(const char* funcName, CUresult res)
488 {
489  if (res != CUDA_SUCCESS)
490  fail("%s() failed: %s!", funcName, decodeError(res));
491 }
492 
493 //------------------------------------------------------------------------
494 
496 {
497  int version = 2010;
498 #if (CUDA_VERSION >= 2020)
499  if (isAvailable_cuDriverGetVersion())
500  cuDriverGetVersion(&version);
501 #endif
502  version /= 10;
503  return version / 10 + version % 10;
504 }
505 
506 //------------------------------------------------------------------------
507 
509 {
510  staticInit();
511  if (!s_available)
512  return 10;
513 
514  int major;
515  int minor;
516  checkError("cuDeviceComputeCapability", cuDeviceComputeCapability(&major, &minor, s_device));
517  return major * 10 + minor;
518 }
519 
520 //------------------------------------------------------------------------
521 
522 int CudaModule::getDeviceAttribute(CUdevice_attribute attrib)
523 {
524  staticInit();
525  if (!s_available)
526  return 0;
527 
528  int value;
529  checkError("cuDeviceGetAttribute", cuDeviceGetAttribute(&value, attrib, s_device));
530  return value;
531 }
532 
533 //------------------------------------------------------------------------
534 
535 CUdevice CudaModule::selectDevice(void)
536 {
537 #if (!FW_USE_CUDA)
538 
539  fail("Image::chooseCudaFormat(): Built without FW_USE_CUDA!");
540  return 0;
541 
542 #else
543 
544  int numDevices;
545  CUdevice device = NULL;
546  S32 bestScore = FW_S32_MIN;
547  checkError("cuDeviceGetCount", cuDeviceGetCount(&numDevices));
548 
549  for (int i = 0; i < numDevices; i++)
550  {
551  CUdevice dev;
552  checkError("cuDeviceGet", cuDeviceGet(&dev, i)); // TODO: Use cuGLGetDevices() on CUDA 4.1+.
553 
554  int clockRate;
555  int numProcessors;
556  checkError("cuDeviceGetAttribute", cuDeviceGetAttribute(&clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev));
557  checkError("cuDeviceGetAttribute", cuDeviceGetAttribute(&numProcessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev));
558 
559  S32 score = clockRate * numProcessors;
560  if (score > bestScore)
561  {
562  device = dev;
563  bestScore = score;
564  }
565  }
566 
567  if (bestScore == FW_S32_MIN)
568  fail("CudaModule: No appropriate CUDA device found!");
569  return device;
570 
571 #endif
572 }
573 
574 //------------------------------------------------------------------------
575 
576 void CudaModule::printDeviceInfo(CUdevice device)
577 {
578 #if (!FW_USE_CUDA)
579 
580  FW_UNREF(device);
581  fail("CudaModule::printDeviceInfo(): Built without FW_USE_CUDA!");
582 
583 #else
584 
585  static const struct
586  {
587  CUdevice_attribute attrib;
588  const char* name;
589  } attribs[] =
590  {
591 #define A21(ENUM, NAME) { CU_DEVICE_ATTRIBUTE_ ## ENUM, NAME },
592 #if (CUDA_VERSION >= 4000)
593 # define A40(ENUM, NAME) A21(ENUM, NAME)
594 #else
595 # define A40(ENUM, NAME) // TODO: Some of these may exist in earlier versions, too.
596 #endif
597 
598  A21(CLOCK_RATE, "Clock rate")
599  A40(MEMORY_CLOCK_RATE, "Memory clock rate")
600  A21(MULTIPROCESSOR_COUNT, "Number of SMs")
601 // A40(GLOBAL_MEMORY_BUS_WIDTH, "DRAM bus width")
602 // A40(L2_CACHE_SIZE, "L2 cache size")
603 
604  A21(MAX_THREADS_PER_BLOCK, "Max threads per block")
605  A40(MAX_THREADS_PER_MULTIPROCESSOR, "Max threads per SM")
606  A21(REGISTERS_PER_BLOCK, "Max registers per block")
607  A21(SHARED_MEMORY_PER_BLOCK, "Max shared mem per block")
608  A21(TOTAL_CONSTANT_MEMORY, "Constant memory")
609 // A21(WARP_SIZE, "Warp size")
610 
611  A21(MAX_BLOCK_DIM_X, "Max blockDim.x")
612 // A21(MAX_BLOCK_DIM_Y, "Max blockDim.y")
613 // A21(MAX_BLOCK_DIM_Z, "Max blockDim.z")
614  A21(MAX_GRID_DIM_X, "Max gridDim.x")
615 // A21(MAX_GRID_DIM_Y, "Max gridDim.y")
616 // A21(MAX_GRID_DIM_Z, "Max gridDim.z")
617 // A40(MAXIMUM_TEXTURE1D_WIDTH, "Max tex1D.x")
618 // A40(MAXIMUM_TEXTURE2D_WIDTH, "Max tex2D.x")
619 // A40(MAXIMUM_TEXTURE2D_HEIGHT, "Max tex2D.y")
620 // A40(MAXIMUM_TEXTURE3D_WIDTH, "Max tex3D.x")
621 // A40(MAXIMUM_TEXTURE3D_HEIGHT, "Max tex3D.y")
622 // A40(MAXIMUM_TEXTURE3D_DEPTH, "Max tex3D.z")
623 // A40(MAXIMUM_TEXTURE1D_LAYERED_WIDTH, "Max layerTex1D.x")
624 // A40(MAXIMUM_TEXTURE1D_LAYERED_LAYERS, "Max layerTex1D.y")
625 // A40(MAXIMUM_TEXTURE2D_LAYERED_WIDTH, "Max layerTex2D.x")
626 // A40(MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, "Max layerTex2D.y")
627 // A40(MAXIMUM_TEXTURE2D_LAYERED_LAYERS, "Max layerTex2D.z")
628 // A40(MAXIMUM_TEXTURE2D_ARRAY_WIDTH, "Max array.x")
629 // A40(MAXIMUM_TEXTURE2D_ARRAY_HEIGHT, "Max array.y")
630 // A40(MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES, "Max array.z")
631 
632 // A21(MAX_PITCH, "Max memcopy pitch")
633 // A21(TEXTURE_ALIGNMENT, "Texture alignment")
634 // A40(SURFACE_ALIGNMENT, "Surface alignment")
635 
636  A40(CONCURRENT_KERNELS, "Concurrent launches supported")
637  A21(GPU_OVERLAP, "Concurrent memcopy supported")
638  A40(ASYNC_ENGINE_COUNT, "Max concurrent memcopies")
639 // A40(KERNEL_EXEC_TIMEOUT, "Kernel launch time limited")
640 // A40(INTEGRATED, "Integrated with host memory")
641  A40(UNIFIED_ADDRESSING, "Unified addressing supported")
642  A40(CAN_MAP_HOST_MEMORY, "Can map host memory")
643  A40(ECC_ENABLED, "ECC enabled")
644 
645 // A40(TCC_DRIVER, "Driver is TCC")
646 // A40(COMPUTE_MODE, "Compute exclusivity mode")
647 
648 // A40(PCI_BUS_ID, "PCI bus ID")
649 // A40(PCI_DEVICE_ID, "PCI device ID")
650 // A40(PCI_DOMAIN_ID, "PCI domain ID")
651 
652 #undef A21
653 #undef A40
654  };
655 
656  char name[256];
657  int major;
658  int minor;
659  CUsize_t memory;
660 
661  checkError("cuDeviceGetName", cuDeviceGetName(name, FW_ARRAY_SIZE(name) - 1, device));
662  checkError("cuDeviceComputeCapability", cuDeviceComputeCapability(&major, &minor, device));
663  checkError("cuDeviceTotalMem", cuDeviceTotalMem(&memory, device));
664  name[FW_ARRAY_SIZE(name) - 1] = '\0';
665 
666  printf("\n");
667  printf("%-32s%s\n", sprintf("CUDA device %d", (int)device).getPtr(), name);
668  printf("%-32s%s\n", "---", "---");
669  printf("%-32s%d.%d\n", "Compute capability", major, minor);
670  printf("%-32s%.0f megs\n", "Total memory", (F32)memory * exp2(-20));
671 
672  for (int i = 0; i < (int)FW_ARRAY_SIZE(attribs); i++)
673  {
674  int value;
675  if (cuDeviceGetAttribute(&value, attribs[i].attrib, device) == CUDA_SUCCESS)
676  printf("%-32s%d\n", attribs[i].name, value);
677  }
678  printf("\n");
679 
680 #endif
681 }
682 
683 //------------------------------------------------------------------------
684 
685 Vec2i CudaModule::selectGridSize(int numBlocks)
686 {
687  int maxWidth = 65536;
688 #if FW_USE_CUDA
689  checkError("cuDeviceGetAttribute", cuDeviceGetAttribute(&maxWidth, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, s_device));
690 #endif
691 
692  Vec2i size(numBlocks, 1);
693  while (size.x > maxWidth)
694  {
695  size.x = (size.x + 1) >> 1;
696  size.y <<= 1;
697  }
698  return size;
699 }
700 
701 //------------------------------------------------------------------------
702 
703 CUfunction CudaModule::findKernel(const String& name)
704 {
705  // Search from hash.
706 
707  CUfunction* found = m_kernels.search(name);
708  if (found)
709  return *found;
710 
711  // Search from module.
712 
713  CUfunction kernel = NULL;
714  cuModuleGetFunction(&kernel, m_module, name.getPtr());
715  if (!kernel)
716  cuModuleGetFunction(&kernel, m_module, (String("__globfunc_") + name).getPtr());
717  if (!kernel)
718  return NULL;
719 
720  // Add to hash.
721 
722  m_kernels.add(name, kernel);
723  return kernel;
724 }
725 
726 //------------------------------------------------------------------------
#define FW_UNREF(X)
Definition: Defs.hpp:78
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx cuModuleLoadData
Definition: DLLImports.inl:60
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int offset
Definition: DLLImports.inl:84
#define NULL
Definition: Defs.hpp:39
static S64 getMemoryUsed(void)
Definition: CudaModule.cpp:384
const char * getPtr(void) const
Definition: String.hpp:51
static int getDriverVersion(void)
Definition: CudaModule.cpp:495
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes cuParamSetf
Definition: DLLImports.inl:84
static void staticDeinit(void)
Definition: CudaModule.cpp:361
#define FW_S32_MIN
Definition: Defs.hpp:112
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer
Definition: DLLImports.inl:315
const char * name
Definition: DLLImports.cpp:42
void ** ptr
Definition: DLLImports.cpp:74
void * CUsurfref
Definition: DLLImports.hpp:117
CUdevice int ordinal char int CUdevice dev
Definition: DLLImports.inl:48
void wrapCuda(CUdeviceptr cudaPtr, S64 size)
Definition: Buffer.cpp:70
CUdeviceptr getCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:108
CudaModule(const void *cubin)
Definition: CudaModule.cpp:47
CudaKernel getKernel(const String &name)
Definition: CudaModule.cpp:80
int setParami(CUfunction kernel, int offset, S32 value)
Definition: CudaModule.cpp:90
S64 getSize(void) const
Definition: Buffer.hpp:69
unsigned int CUsize_t
Definition: DLLImports.hpp:121
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx cuCtxSynchronize
Definition: DLLImports.inl:58
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat x
Definition: DLLImports.inl:363
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev cuCtxDestroy
Definition: DLLImports.inl:52
CUtexref getTexRef(const String &name)
Definition: CudaModule.cpp:145
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj cuDeviceTotalMem
Definition: DLLImports.inl:106
void setTexRef(const String &name, Buffer &buf, CUarray_format format, int numComponents)
Definition: CudaModule.cpp:193
void setSurfRef(const String &name, CUarray cudaArray)
Definition: CudaModule.cpp:298
static void checkError(const char *funcName, CUresult res)
Definition: CudaModule.cpp:487
static int getComputeCapability(void)
Definition: CudaModule.cpp:508
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin cuModuleGetFunction
Definition: DLLImports.inl:64
float F32
Definition: Defs.hpp:89
int setParamf(CUfunction kernel, int offset, F32 value)
Definition: CudaModule.cpp:99
void free(void *ptr)
Definition: Defs.cpp:164
CUdevice int ordinal cuDeviceGetName
Definition: DLLImports.inl:48
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes cuGLCtxCreate
Definition: DLLImports.inl:144
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am cuTexRefSetFlags
Definition: DLLImports.inl:76
signed int S32
Definition: Defs.hpp:88
T & add(void)
Definition: Array.hpp:384
String sprintf(const char *fmt,...)
Definition: Defs.cpp:241
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int dim
Definition: DLLImports.inl:74
signed __int64 S64
Definition: Defs.hpp:98
F32 exp2(F32 a)
Definition: Math.hpp:87
unsigned int U32
Definition: Defs.hpp:85
ImageFormat chooseCudaFormat(CUDA_ARRAY_DESCRIPTOR *desc=NULL, ImageFormat::ID desiredFormat=ImageFormat::ID_Max) const
Definition: Image.cpp:531
cuDeviceGet
Definition: DLLImports.inl:46
Buffer & getBuffer(void) const
Definition: Image.hpp:148
Buffer & getGlobal(const String &name)
Definition: CudaModule.cpp:117
void setTexRefMode(CUtexref texRef, bool wrap=true, bool bilinear=true, bool normalizedCoords=true, bool readAsInt=false)
Definition: CudaModule.cpp:161
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value
Definition: DLLImports.inl:84
void unsetTexRef(const String &name)
Definition: CudaModule.cpp:256
const ImageFormat & getFormat(void) const
Definition: Image.hpp:144
S64 getStride(void) const
Definition: Image.hpp:146
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent cuEventDestroy
Definition: DLLImports.inl:94
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height cuEventCreate
Definition: DLLImports.inl:90
void printf(const char *fmt,...)
Definition: Defs.cpp:225
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags cuEventQuery
Definition: DLLImports.inl:92
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: DLLImports.inl:349
#define FW_ARRAY_SIZE(X)
Definition: Defs.hpp:79
void updateGlobals(bool async=false, CUstream stream=NULL)
Definition: CudaModule.cpp:137
static void sync(bool yield=true)
Definition: CudaModule.cpp:398
bool hasKernel(const String &name)
Definition: CudaModule.cpp:73
static int getDeviceAttribute(CUdevice_attribute attrib)
Definition: CudaModule.cpp:522
CUsurfref getSurfRef(const String &name)
Definition: CudaModule.cpp:283
int setParamPtr(CUfunction kernel, int offset, CUdeviceptr value)
Definition: CudaModule.cpp:108
static const char * decodeError(CUresult res)
Definition: CudaModule.cpp:426
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef cuTexRefSetArray
Definition: DLLImports.inl:72
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray cuTexRefSetAddress
Definition: DLLImports.inl:142
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags cuTexRefSetAddressMode
Definition: DLLImports.inl:74
static void staticInit(void)
Definition: CudaModule.cpp:311
void fail(const char *fmt,...)
Definition: Defs.cpp:304
static void yield(void)
Definition: Thread.cpp:338
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void * image
Definition: DLLImports.inl:60
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr size
Definition: DLLImports.inl:319
static void staticInit(void)
Definition: GLContext.cpp:894
S getSize(void) const
Definition: Array.hpp:188
CUdevice * device
Definition: DLLImports.inl:46
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value cuParamSetTexRef
Definition: DLLImports.inl:86
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev cuModuleGetGlobal
Definition: DLLImports.inl:108
void updateTexRefs(CUfunction kernel)
Definition: CudaModule.cpp:264