80 CUfunction
getHandle (
void)
const {
return m_function; }
92 CudaKernel&
setParams (
P p0,
P p1,
P p2,
P p3,
P p4,
P p5) {
const Param*
p[] = { &p0, &p1, &p2, &p3, &p4, &p5 };
return setParams(p, 6); }
93 CudaKernel&
setParams (
P p0,
P p1,
P p2,
P p3,
P p4,
P p5,
P p6) {
const Param*
p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6 };
return setParams(p, 7); }
94 CudaKernel&
setParams (
P p0,
P p1,
P p2,
P p3,
P p4,
P p5,
P p6,
P p7) {
const Param*
p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7 };
return setParams(p, 8); }
95 CudaKernel&
setParams (
P p0,
P p1,
P p2,
P p3,
P p4,
P p5,
P p6,
P p7,
P p8) {
const Param*
p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8 };
return setParams(p, 9); }
96 CudaKernel&
setParams (
P p0,
P p1,
P p2,
P p3,
P p4,
P p5,
P p6,
P p7,
P p8,
P p9) {
const Param*
p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9 };
return setParams(p, 10); }
97 CudaKernel&
setParams (
P p0,
P p1,
P p2,
P p3,
P p4,
P p5,
P p6,
P p7,
P p8,
P p9,
P p10) {
const Param*
p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10 };
return setParams(p, 11); }
98 CudaKernel&
setParams (
P p0,
P p1,
P p2,
P p3,
P p4,
P p5,
P p6,
P p7,
P p8,
P p9,
P p10,
P p11) {
const Param*
p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10, &p11 };
return setParams(p, 12); }
99 CudaKernel&
setParams (
P p0,
P p1,
P p2,
P p3,
P p4,
P p5,
P p6,
P p7,
P p8,
P p9,
P p10,
P p11,
P p12) {
const Param*
p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10, &p11, &p12 };
return setParams(p, 13); }
100 CudaKernel&
setParams (
P p0,
P p1,
P p2,
P p3,
P p4,
P p5,
P p6,
P p7,
P p8,
P p9,
P p10,
P p11,
P p12,
P p13) {
const Param*
p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10, &p11, &p12, &p13 };
return setParams(p, 14); }
101 CudaKernel&
setParams (
P p0,
P p1,
P p2,
P p3,
P p4,
P p5,
P p6,
P p7,
P p8,
P p9,
P p10,
P p11,
P p12,
P p13,
P p14) {
const Param*
p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10, &p11, &p12, &p13, &p14 };
return setParams(p, 15); }
102 CudaKernel&
setParams (
P p0,
P p1,
P p2,
P p3,
P p4,
P p5,
P p6,
P p7,
P p8,
P p9,
P p10,
P p11,
P p12,
P p13,
P p14,
P p15) {
const Param*
p[] = { &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10, &p11, &p12, &p13, &p14, &p15 };
return setParams(p, 16); }
128 bool prepareLaunch (
void);
129 void performLaunch (
void);
133 CUfunction m_function;
137 S32 m_sharedBankSize;
CudaKernel & preferShared(void)
CudaModule * getModule(void) const
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11)
CudaKernel & preferL1(void)
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule * module
CudaKernel & launch(const Vec2i &sizeThreads, const Vec2i &blockSize=0)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat GLfloat GLfloat z GLuint GLint GLenum GLboolean GLsizei const GLvoid pointer GLuint GLuint const GLchar name GLenum GLsizei GLenum GLsizei GLsizei height GLenum GLuint renderbuffer GLenum GLenum GLint * params
CUdeviceptr getCudaPtr(S64 ofs=0)
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9)
F32 launchTimed(const Vec2i &sizeThreads, const Vec2i &blockSize=0, bool yield=true)
CudaKernel & sync(bool yield=true)
CudaKernel & setParams(P p0, P p1, P p2)
CudaKernel & cancelAsync(void)
CudaKernel & setAsync(CUstream stream=NULL)
S32 getNumBytes(void) const
void wrapCPU(void *cpuPtr, S64 size)
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10)
CudaKernel & setParams(P p0, P p1, P p2, P p3)
CudaKernel(const CudaKernel &other)
F32 launchTimed(int numThreads, const Vec2i &blockSize=0, bool yield=true)
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12, P p13)
CudaKernel(CudaModule *module=NULL, CUfunction function=NULL)
CudaKernel & setGridExact(const Vec2i &blockSize, const Vec2i &gridSize)
CUdeviceptr getMutableCudaPtr(S64 ofs=0)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8)
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12, P p13, P p14)
Param(const Array< T > &v)
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5)
Param(const T *ptr, int num=1)
void setCudaPtr(CUdeviceptr ptr)
CudaKernel & operator=(const CudaKernel &other)
Buffer & getBuffer(void) const
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12, P p13, P p14, P p15)
Param(const Array64< T > &v)
Vec2i getDefaultBlockSize(void) const
CUfunction getHandle(void) const
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6, P p7, P p8, P p9, P p10, P p11, P p12)
CudaKernel & setParams(void)
CudaKernel & launch(int numThreads, const Vec2i &blockSize=0)
CudaKernel & setParams(P p0)
CudaKernel & setSharedBankSize(int bytes)
CudaKernel & setParams(const void *ptr, int size)
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4)
CudaKernel & setParams(P p0, P p1)
CudaKernel & launch(void)
int getAttribute(CUfunction_attribute attrib) const
T set(S idx, const T &item)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void * p
F32 launchTimed(bool yield=true)
const T * getPtr(S32idx=0) const
CudaKernel & setGrid(int numThreads, const Vec2i &blockSize=0)
CudaKernel & setParams(P p0, P p1, P p2, P p3, P p4, P p5, P p6)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr size