NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CudaKDTreeTracer.cpp
Go to the documentation of this file.
1 
2 #include "CudaKDTreeTracer.hpp"
3 #include "gui/Window.hpp"
4 #include "io/File.hpp"
5 
6 using namespace FW;
7 //------------------------------------------------------------------------
8 
10 : m_kdtree(NULL)
11 {
13  m_compiler.addOptions("-use_fast_math");
14 }
15 
16 //------------------------------------------------------------------------
17 
19 {
20 }
21 
22 //------------------------------------------------------------------------
23 
24 void CudaKDTreeTracer::setKernel(const String& kernelName)
25 {
26  //if (m_kernelName == kernelName)
27  // return;
28  //m_kernelName = kernelName;
29 
30  m_kernelName = String("fermi_kdtree_while_while_leafRef");
31 
32  // Compile kernel.
33 
34  CudaModule* module = compileKernel();
35 
36  // Initialize config with default values.
37  {
38  KernelConfig& c = *(KernelConfig*)module->getGlobal("g_config").getMutablePtr();
40  c.blockWidth = 0;
41  c.blockHeight = 0;
43  }
44 
45  // Query config.
46 
47  module->getKernel("queryConfig").launch(1, 1);
48  m_kernelConfig = *(const KernelConfig*)module->getGlobal("g_config").getPtr();
49 }
50 
51 //------------------------------------------------------------------------
52 
54 {
55  // No rays => done.
56 
57  int numRays = rays.getSize();
58  if (!numRays)
59  return 0.0f;
60 
61  //KernelInput& in = *((KernelInput*)module->getGlobal("c_in").getMutablePtr());
62  // Start the timer
63  m_timer.unstart();
64  m_timer.start();
65 
66  CUdeviceptr nodePtr = m_kdtree->getNodeBuffer().getCudaPtr();
67  Vec2i nodeOfsA = Vec2i(0, (S32)m_kdtree->getNodeBuffer().getSize());
68 
69  CUdeviceptr triPtr = m_kdtree->getTriWoopBuffer().getCudaPtr();
70  Vec2i triOfsA = Vec2i(0, (S32)m_kdtree->getTriWoopBuffer().getSize());
71  Buffer& indexBuf = m_kdtree->getTriIndexBuffer();
72 
73  CudaModule* module = compileKernel();
74  CudaKernel kernel = module->getKernel("trace_kdtree");
75 
76  CudaKernel::Param bmin(m_bbox.min().getPtr(), 3);
77  CudaKernel::Param bmax(m_bbox.max().getPtr(), 3);
78 
79  // Set input.
80  // The new (this?) version has it via parameters, not const memory
81  kernel.setParams(
82  rays.getSize(),
83  rays.getNeedClosestHit() == false,
84  bmin,
85  bmax,
86  (m_bbox.max() + m_bbox.min()).length() * 0.000001f,
87  rays.getRayBuffer().getCudaPtr(), // rays
88  rays.getResultBuffer().getMutableCudaPtr(), // results
89  nodePtr + nodeOfsA.x,
90  nodePtr + nodeOfsA.x,
91  nodePtr + nodeOfsA.x,
92  nodePtr + nodeOfsA.x,
93  triPtr + triOfsA.x,
94  triPtr + triOfsA.x,
95  triPtr + triOfsA.x,
96  indexBuf.getCudaPtr()
97  );
98 
99  // Set texture references.
100  module->setTexRef("t_rays", rays.getRayBuffer(), CU_AD_FORMAT_FLOAT, 4);
101  //m_module->setTexRef("t_nodesI", nodePtr + nodeOfsA.x, nodeOfsA.y, CU_AD_FORMAT_FLOAT, 4);
102  //m_module->setTexRef("t_trisA", triPtr + triOfsA.x, triOfsA.y, CU_AD_FORMAT_FLOAT, 4);
103  //m_module->setTexRef("t_triIndices", indexBuf, CU_AD_FORMAT_SIGNED_INT32, 1);
104 
105  // Determine block and grid sizes.
106  int desiredWarps = (rays.getSize() + 31) / 32;
107  if (m_kernelConfig.usePersistentThreads != 0)
108  {
109  *(S32*)module->getGlobal("g_warpCounter").getMutablePtr() = 0;
110  desiredWarps = 720; // Tesla: 30 SMs * 24 warps, Fermi: 15 SMs * 48 warps
111  }
112 
113  Vec2i blockSize(m_kernelConfig.blockWidth, m_kernelConfig.blockHeight);
114  int blockWarps = (blockSize.x * blockSize.y + 31) / 32;
115  int numBlocks = (desiredWarps + blockWarps - 1) / blockWarps;
116 
117  // Launch.
118  return kernel.launchTimed(numBlocks * blockSize.x * blockSize.y, blockSize);
119  //return module->launchKernelTimed(kernel, blockSize, gridSize);
120 }
121 
122 //------------------------------------------------------------------------
123 
124 CudaModule* CudaKDTreeTracer::compileKernel(void)
125 {
126  m_compiler.setSourceFile(FW::sprintf("src/rt/kernels/%s.cu", m_kernelName.getPtr()));
127  m_compiler.clearDefines();
128  CudaModule* module = m_compiler.compile();
129  return module;
130 }
131 
132 //------------------------------------------------------------------------
CudaKDTreeTracer(void)
Constructor.
FW_CUDA_FUNC T length(const VectorBase< T, L, S > &v)
Definition: Math.hpp:459
S32 getSize() const
Gets size of the buffer (number of rays).
Definition: RayBuffer.hpp:52
#define NULL
Definition: Defs.hpp:39
const char * getPtr(void) const
Definition: String.hpp:51
Buffer & getTriIndexBuffer(void)
Returns triangle index buffer.
Definition: CudaKDTree.hpp:58
FW_CUDA_FUNC const Vec3f & max(void) const
Definition: Util.hpp:49
CudaModule * compile(bool enablePrints=true, bool autoFail=true)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule * module
Definition: DLLImports.inl:60
void unstart(void)
Definition: Timer.hpp:43
CUdeviceptr getCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:108
void start(void)
Definition: Timer.hpp:42
CudaKernel getKernel(const String &name)
Definition: CudaModule.cpp:80
S64 getSize(void) const
Definition: Buffer.hpp:69
Buffer & getTriWoopBuffer(void)
Returns buffer of woopified triangles.
Definition: CudaKDTree.hpp:65
const U8 * getPtr(S64 ofs=0)
Definition: Buffer.hpp:106
Buffer & getRayBuffer()
Gets ray buffer.
Definition: RayBuffer.hpp:167
bool getNeedClosestHit() const
Returns whether the closest hit is needed.
Definition: RayBuffer.hpp:150
void setTexRef(const String &name, Buffer &buf, CUarray_format format, int numComponents)
Definition: CudaModule.cpp:193
float F32
Definition: Defs.hpp:89
CUdeviceptr getMutableCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:112
Buffer & getResultBuffer()
Gets ray result buffer.
Definition: RayBuffer.hpp:173
Ray buffer class. Stores rays.
Definition: RayBuffer.hpp:38
U8 * getMutablePtr(S64 ofs=0)
Definition: Buffer.hpp:110
signed int S32
Definition: Defs.hpp:88
FW_CUDA_FUNC const Vec3f & min(void) const
Definition: Util.hpp:48
String sprintf(const char *fmt,...)
Definition: Defs.cpp:241
Definitions for the Cuda KDTree Tracer.
FW_CUDA_FUNC const F32 * getPtr(void) const
Definition: Math.hpp:333
Buffer & getGlobal(const String &name)
Definition: CudaModule.cpp:117
CudaKernel & setParams(const void *ptr, int size)
Definition: CudaKernel.hpp:83
Buffer & getNodeBuffer(void)
Returns node buffer.
Definition: CudaKDTree.hpp:52
void setKernel(const String &kernelName)
Sets kernel that should perform the actual traversation of the k-d tree on the gpu.
CudaKernel & launch(void)
Definition: CudaKernel.cpp:179
F32 launchTimed(bool yield=true)
Definition: CudaKernel.cpp:188
static void staticInit(void)
Definition: CudaModule.cpp:311
F32 traceBatch(RayBuffer &rays)
Traces given batch of rays.
void addOptions(const String &options)
void clearDefines(void)
void setSourceFile(const String &path)
~CudaKDTreeTracer(void)
Destructor.