42 #define KDTREE_MASK 0xF0000000 // Binary flag mask
43 #define KDTREE_UNMASK 0x0FFFFFFF // Mask for removing the flags
44 #define KDTREE_LEAF 0xC0000000 // Leaf binary flag
45 #define KDTREE_EMPTYLEAF 0x80000000 // Leaf binary flag
46 #define KDTREE_DIMPOS 28
81 #define TRACE_FUNC_KDTREE \
82 extern "C" __global__ void trace_kdtree( \
99 #define TRACE_FUNC_BVH \
100 extern "C" __global__ void trace_bvh( \
169 texture<float4, 1> t_rays;
170 texture<float4, 1> t_nodesA;
171 texture<float4, 1> t_nodesB;
172 texture<float4, 1> t_nodesC;
173 texture<float4, 1> t_nodesD;
174 texture<float4, 1> t_trisA;
175 texture<float4, 1> t_trisB;
176 texture<float4, 1> t_trisC;
177 texture<int, 1> t_triIndices;
179 __global__
void queryConfig(
void);
185 __global__
void otrace_kernel(
void);
186 texture<float4, 2> t_textureAtlas;
195 #define FETCH_GLOBAL(NAME, IDX, TYPE) ((const TYPE*)NAME)[IDX]
196 #define FETCH_TEXTURE(NAME, IDX, TYPE) tex1Dfetch(t_ ## NAME, IDX)
198 #define STORE_RESULT(RAY, TRI, T, U, V) results[RAY] = make_int4(TRI, __float_as_int(T), __float_as_int(U), __float_as_int(V))
204 template <
class T> __device__ __inline__
void swap(T& a,T& b)
211 __device__ __inline__
float min4(
float a,
float b,
float c,
float d)
213 return fminf(fminf(fminf(a, b), c), d);
216 __device__ __inline__
float max4(
float a,
float b,
float c,
float d)
218 return fmaxf(fmaxf(fmaxf(a, b), c), d);
221 __device__ __inline__
float min3(
float a,
float b,
float c)
223 return fminf(fminf(a, b), c);
226 __device__ __inline__
float max3(
float a,
float b,
float c)
228 return fmaxf(fmaxf(a, b), c);
232 __inline__ __device__
float fminf2(
float a,
float b)
234 int a2 = __float_as_int(a);
235 int b2 = __float_as_int(b);
236 return __int_as_float( a2<b2 ? a2 : b2 );
239 __inline__ __device__
float fmaxf2(
float a,
float b)
241 int a2 = __float_as_int(a);
242 int b2 = __float_as_int(b);
243 return __int_as_float( a2>b2 ? a2 : b2 );
247 __device__ __inline__
int min_min (
int a,
int b,
int c) {
int v;
asm(
"vmin.s32.s32.s32.min %0, %1, %2, %3;" :
"=r"(
v) :
"r"(a),
"r"(b),
"r"(c));
return v; }
248 __device__ __inline__
int min_max (
int a,
int b,
int c) {
int v;
asm(
"vmin.s32.s32.s32.max %0, %1, %2, %3;" :
"=r"(
v) :
"r"(a),
"r"(b),
"r"(c));
return v; }
249 __device__ __inline__
int max_min (
int a,
int b,
int c) {
int v;
asm(
"vmax.s32.s32.s32.min %0, %1, %2, %3;" :
"=r"(
v) :
"r"(a),
"r"(b),
"r"(c));
return v; }
250 __device__ __inline__
int max_max (
int a,
int b,
int c) {
int v;
asm(
"vmax.s32.s32.s32.max %0, %1, %2, %3;" :
"=r"(
v) :
"r"(a),
"r"(b),
"r"(c));
return v; }
251 __device__ __inline__
float fmin_fmin (
float a,
float b,
float c) {
return __int_as_float(min_min(__float_as_int(a), __float_as_int(b), __float_as_int(c))); }
252 __device__ __inline__
float fmin_fmax (
float a,
float b,
float c) {
return __int_as_float(min_max(__float_as_int(a), __float_as_int(b), __float_as_int(c))); }
253 __device__ __inline__
float fmax_fmin (
float a,
float b,
float c) {
return __int_as_float(max_min(__float_as_int(a), __float_as_int(b), __float_as_int(c))); }
254 __device__ __inline__
float fmax_fmax (
float a,
float b,
float c) {
return __int_as_float(max_max(__float_as_int(a), __float_as_int(b), __float_as_int(c))); }
257 __device__ __inline__
float magic_max7(
float a0,
float a1,
float b0,
float b1,
float c0,
float c1,
float d)
259 float t1 = fmin_fmax(a0, a1, d);
260 float t2 = fmin_fmax(b0, b1, t1);
261 float t3 = fmin_fmax(c0, c1, t2);
265 __device__ __inline__
float magic_min7(
float a0,
float a1,
float b0,
float b1,
float c0,
float c1,
float d)
267 float t1 = fmax_fmin(a0, a1, d);
268 float t2 = fmax_fmin(b0, b1, t1);
269 float t3 = fmax_fmin(c0, c1, t2);
274 __device__ __inline__
float spanBeginKepler(
float a0,
float a1,
float b0,
float b1,
float c0,
float c1,
float d){
return fmax_fmax( fminf(a0,a1), fminf(b0,b1), fmin_fmax(c0, c1, d)); }
275 __device__ __inline__
float spanEndKepler(
float a0,
float a1,
float b0,
float b1,
float c0,
float c1,
float d) {
return fmin_fmin( fmaxf(a0,a1), fmaxf(b0,b1), fmax_fmin(c0, c1, d)); }
278 __device__ __inline__
float spanBeginFermi(
float a0,
float a1,
float b0,
float b1,
float c0,
float c1,
float d) {
return magic_max7(a0, a1, b0, b1, c0, c1, d); }
279 __device__ __inline__
float spanEndFermi(
float a0,
float a1,
float b0,
float b1,
float c0,
float c1,
float d) {
return magic_min7(a0, a1, b0, b1, c0, c1, d); }
#define TRACE_FUNC_KDTREE
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v
FW_CUDA_FUNC void swap(T &a, T &b)