NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CudaKernel.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2011, NVIDIA Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA Corporation nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 #include "gui/Image.hpp"
30 
31 namespace FW
32 {
33 //------------------------------------------------------------------------
34 
35 class CudaModule;
36 
37 //------------------------------------------------------------------------
38 // Automatic translation of kernel parameters:
39 //
40 // MyType* => mutable CUdeviceptr, valid for ONE element
41 // CudaKernel::Param(MyType*, N) => mutable CUdeviceptr, valid for N elements
42 // Array<MyType> => mutable CUdeviceptr, valid for all elements
43 // Buffer => mutable CUdeviceptr, valid for the whole buffer
44 // Image => mutable CUdeviceptr, valid for all pixels
45 // MyType => MyType, passed by value (int, float, Vec4f, etc.)
46 //------------------------------------------------------------------------
47 
49 {
50 public:
51  struct Param // Wrapper for converting kernel parameters to CUDA-compatible types.
52  {
55  const void* value;
56  CUdeviceptr cudaPtr;
58 
59  template <class T> Param (const T& v) { size = sizeof(T); align = __alignof(T); value = &v; }
60  template <class T> Param (const T* ptr, int num = 1) { buffer.wrapCPU(ptr, num * sizeof(T)); setCudaPtr(buffer.getCudaPtr()); }
61  template <class T> Param (T* ptr, int num = 1) { buffer.wrapCPU(ptr, num * sizeof(T)); setCudaPtr(buffer.getMutableCudaPtr()); }
62  template <class T> Param (const Array<T>& v) { buffer.wrapCPU(v.getPtr(), v.getNumBytes()); setCudaPtr(buffer.getCudaPtr()); }
64  template <class T> Param (const Array64<T>& v) { buffer.wrapCPU(v.getPtr(), v.getNumBytes()); setCudaPtr(buffer.getCudaPtr()); }
67  Param (const Image& v) { setCudaPtr(v.getBuffer().getCudaPtr()); }
69  void setCudaPtr (CUdeviceptr ptr) { size = sizeof(CUdeviceptr); align = __alignof(CUdeviceptr); value = &cudaPtr; cudaPtr = ptr; }
70  };
71 
72  typedef const Param& P; // To reduce the amount of code in setParams() overloads.
73 
74 public:
75  CudaKernel (CudaModule* module = NULL, CUfunction function = NULL);
76  CudaKernel (const CudaKernel& other) { operator=(other); }
77  ~CudaKernel (void);
78 
79  CudaModule* getModule (void) const { return m_module; }
80  CUfunction getHandle (void) const { return m_function; }
81  int getAttribute (CUfunction_attribute attrib) const;
82 
83  CudaKernel& setParams (const void* ptr, int size) { m_params.set((const U8*)ptr, size); }
84  CudaKernel& setParams (const Param* const* params, int numParams);
85 
86  CudaKernel& setParams (void) { return setParams((const Param* const*)NULL, 0); }
87  CudaKernel& setParams (P p0) { const Param* p[] = { &p0 }; return setParams(p, 1); }
88  CudaKernel& setParams (P p0, P p1) { const Param* p[] = { &p0, &p1 }; return setParams(p, 2); }
89  CudaKernel& setParams (P p0, P p1, P p2) { const Param* p[] = { &p0, &p1, &p2 }; return setParams(p, 3); }
90  CudaKernel& setParams (P p0, P p1, P p2, P p3) { const Param* p[] = { &p0, &p1, &p2, &p3 }; return setParams(p, 4); }
91  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4 }; return setParams(p, 5); }
92  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4, P p5) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4, &p5 }; return setParams(p, 6); }
93  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4, P p5, P p6) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6 }; return setParams(p, 7); }
94  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7 }; return setParams(p, 8); }
95  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8 }; return setParams(p, 9); }
96  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9 }; return setParams(p, 10); }
97  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10 }; return setParams(p, 11); }
98  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10, &p11 }; return setParams(p, 12); }
99  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10, &p11, &p12 }; return setParams(p, 13); }
100  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12, P p13) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10, &p11, &p12, &p13 }; return setParams(p, 14); }
101  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12, P p13, P p14) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10, &p11, &p12, &p13, &p14 }; return setParams(p, 15); }
102  CudaKernel& setParams (P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12, P p13, P p14, P p15) { const Param* p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10, &p11, &p12, &p13, &p14, &p15 }; return setParams(p, 16); }
103 
104  CudaKernel& preferL1 (void) { m_preferL1 = true; return *this; }
105  CudaKernel& preferShared (void) { m_preferL1 = false; return *this; }
106  CudaKernel& setSharedBankSize (int bytes) { FW_ASSERT(bytes == 4 || bytes == 8); m_sharedBankSize = bytes; return *this; }
107 
108  CudaKernel& setAsync (CUstream stream = NULL) { m_async = true; m_stream = stream; return *this; }
109  CudaKernel& cancelAsync (void) { m_async = false; return *this; }
110 
111  Vec2i getDefaultBlockSize (void) const; // Smallest block that reaches maximal occupancy.
112  CudaKernel& setGrid (int numThreads, const Vec2i& blockSize = 0); // Generates at least numThreads.
113  CudaKernel& setGrid (const Vec2i& sizeThreads, const Vec2i& blockSize = 0); // Generates at least sizeThreads in both X and Y.
114  CudaKernel& setGridExact (const Vec2i& blockSize, const Vec2i& gridSize) { m_blockSize = blockSize; m_gridSize = gridSize; return *this; } // Set exact blockSize and gridSize.
115 
116  CudaKernel& launch (void);
117  CudaKernel& launch (int numThreads, const Vec2i& blockSize = 0) { setGrid(numThreads, blockSize); return launch(); }
118  CudaKernel& launch (const Vec2i& sizeThreads, const Vec2i& blockSize = 0) { setGrid(sizeThreads, blockSize); return launch(); }
119  F32 launchTimed (bool yield = true); // Returns GPU time in seconds.
120  F32 launchTimed (int numThreads, const Vec2i& blockSize = 0, bool yield = true) { setGrid(numThreads, blockSize); return launchTimed(yield); }
121  F32 launchTimed (const Vec2i& sizeThreads, const Vec2i& blockSize = 0, bool yield = true) { setGrid(sizeThreads, blockSize); return launchTimed(yield); }
122 
123  CudaKernel& sync (bool yield = true); // False = low latency but keeps the CPU busy. True = long latency but relieves the CPU.
124 
125  CudaKernel& operator= (const CudaKernel& other);
126 
127 private:
128  bool prepareLaunch (void);
129  void performLaunch (void);
130 
131 private:
132  CudaModule* m_module;
133  CUfunction m_function;
134 
135  Array<U8> m_params;
136  bool m_preferL1;
137  S32 m_sharedBankSize;
138  bool m_async;
139  CUstream m_stream;
140  Vec2i m_gridSize;
141  Vec2i m_blockSize;
142 };
143 
144 //------------------------------------------------------------------------
145 }
CudaKernel & preferShared(void)
Definition: CudaKernel.hpp:105
#define NULL
Definition: Defs.hpp:39
CudaModule * getModule(void) const
Definition: CudaKernel.hpp:79
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11)
Definition: CudaKernel.hpp:98
CudaKernel & preferL1(void)
Definition: CudaKernel.hpp:104
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7)
Definition: CudaKernel.hpp:94
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule * module
Definition: DLLImports.inl:60
CudaKernel & launch(const Vec2i &sizeThreads, const Vec2i &blockSize=0)
Definition: CudaKernel.hpp:118
void ** ptr
Definition: DLLImports.cpp:74
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat GLfloat GLfloat z GLuint GLint GLenum GLboolean GLsizei const GLvoid pointer GLuint GLuint const GLchar name GLenum GLsizei GLenum GLsizei GLsizei height GLenum GLuint renderbuffer GLenum GLenum GLint * params
Definition: DLLImports.inl:373
CUdeviceptr getCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:108
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9)
Definition: CudaKernel.hpp:96
F32 launchTimed(const Vec2i &sizeThreads, const Vec2i &blockSize=0, bool yield=true)
Definition: CudaKernel.hpp:121
CudaKernel & sync(bool yield=true)
Definition: CudaKernel.cpp:225
CudaKernel & setParams(P p0, P p1, P p2)
Definition: CudaKernel.hpp:89
CudaKernel & cancelAsync(void)
Definition: CudaKernel.hpp:109
CudaKernel & setAsync(CUstream stream=NULL)
Definition: CudaKernel.hpp:108
S32 getNumBytes(void) const
void wrapCPU(void *cpuPtr, S64 size)
Definition: Buffer.cpp:39
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10)
Definition: CudaKernel.hpp:97
Param(Array64< T > &v)
Definition: CudaKernel.hpp:65
CudaKernel & setParams(P p0, P p1, P p2, P p3)
Definition: CudaKernel.hpp:90
float F32
Definition: Defs.hpp:89
CudaKernel(const CudaKernel &other)
Definition: CudaKernel.hpp:76
F32 launchTimed(int numThreads, const Vec2i &blockSize=0, bool yield=true)
Definition: CudaKernel.hpp:120
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12, P p13)
Definition: CudaKernel.hpp:100
CudaKernel(CudaModule *module=NULL, CUfunction function=NULL)
Definition: CudaKernel.cpp:36
CudaKernel & setGridExact(const Vec2i &blockSize, const Vec2i &gridSize)
Definition: CudaKernel.hpp:114
CUdeviceptr getMutableCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:112
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v
Definition: DLLImports.inl:329
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes
Definition: DLLImports.inl:68
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8)
Definition: CudaKernel.hpp:95
#define FW_ASSERT(X)
Definition: Defs.hpp:67
const void * value
Definition: CudaKernel.hpp:55
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12, P p13, P p14)
Definition: CudaKernel.hpp:101
signed int S32
Definition: Defs.hpp:88
Param(const Array< T > &v)
Definition: CudaKernel.hpp:62
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5)
Definition: CudaKernel.hpp:92
Param(const T *ptr, int num=1)
Definition: CudaKernel.hpp:60
Param(const Image &v)
Definition: CudaKernel.hpp:67
void setCudaPtr(CUdeviceptr ptr)
Definition: CudaKernel.hpp:69
Param(Array< T > &v)
Definition: CudaKernel.hpp:63
CudaKernel & operator=(const CudaKernel &other)
Definition: CudaKernel.cpp:283
Buffer & getBuffer(void) const
Definition: Image.hpp:148
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12, P p13, P p14, P p15)
Definition: CudaKernel.hpp:102
Param(const Array64< T > &v)
Definition: CudaKernel.hpp:64
const Param & P
Definition: CudaKernel.hpp:72
Vec2i getDefaultBlockSize(void) const
Definition: CudaKernel.cpp:95
CUfunction getHandle(void) const
Definition: CudaKernel.hpp:80
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12)
Definition: CudaKernel.hpp:99
CudaKernel & setParams(void)
Definition: CudaKernel.hpp:86
CudaKernel & launch(int numThreads, const Vec2i &blockSize=0)
Definition: CudaKernel.hpp:117
CudaKernel & setParams(P p0)
Definition: CudaKernel.hpp:87
CudaKernel & setSharedBankSize(int bytes)
Definition: CudaKernel.hpp:106
CudaKernel & setParams(const void *ptr, int size)
Definition: CudaKernel.hpp:83
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4)
Definition: CudaKernel.hpp:91
unsigned char U8
Definition: Defs.hpp:83
CudaKernel & setParams(P p0, P p1)
Definition: CudaKernel.hpp:88
CudaKernel & launch(void)
Definition: CudaKernel.cpp:179
int getAttribute(CUfunction_attribute attrib) const
Definition: CudaKernel.cpp:56
T set(S idx, const T &item)
Definition: Array.hpp:248
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void * p
Definition: DLLImports.inl:66
F32 launchTimed(bool yield=true)
Definition: CudaKernel.cpp:188
const T * getPtr(S32idx=0) const
CudaKernel & setGrid(int numThreads, const Vec2i &blockSize=0)
Definition: CudaKernel.cpp:144
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6)
Definition: CudaKernel.hpp:93
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr size
Definition: DLLImports.inl:319
Param(T *ptr, int num=1)
Definition: CudaKernel.hpp:61