NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CudaBVHTracer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2011, NVIDIA Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA Corporation nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "cuda/CudaBVHTracer.hpp"
29 #include "gui/Window.hpp"
30 #include "io/File.hpp"
31 
32 using namespace FW;
33 
34 //------------------------------------------------------------------------
35 
37 : m_bvh(NULL)
38 {
40  m_compiler.addOptions("-use_fast_math");
41  m_otrace = false;
42 }
43 
44 //------------------------------------------------------------------------
45 
47 {
48 }
49 
50 //------------------------------------------------------------------------
51 
52 void CudaBVHTracer::setKernel(const String& kernelName)
53 {
54  // Not changed => done.
55 
56  if (m_kernelName == kernelName)
57  return;
58  m_kernelName = kernelName;
59 
60  // Compile kernel.
61 
62  CudaModule* module = compileKernel();
63 
64  // Initialize config with default values.
65  {
66  KernelConfig& c = *(KernelConfig*)module->getGlobal("g_config").getMutablePtr();
68  c.blockWidth = 0;
69  c.blockHeight = 0;
71  }
72 
73  // Query config.
74 
75  module->getKernel("queryConfig").launch(1, 1);
76  m_kernelConfig = *(const KernelConfig*)module->getGlobal("g_config").getPtr();
77 
78  // Detect whether this is one of the otrace kernels (otrace kernels compute the final shading
79  // color inside the kernel
80  if(strstr(kernelName.getPtr(), "otracer"))
81  {
82  m_otrace = true;
83  }
84 }
85 
86 //------------------------------------------------------------------------
87 
89 {
90  // No rays => done.
91 
92  int numRays = rays.getSize();
93  if (!numRays)
94  return 0.0f;
95 
96  // Check BVH consistency.
97 
98  if (!m_bvh)
99  fail("CudaBVHTracer: No BVH!");
100  if (m_bvh->getLayout() != getDesiredBVHLayout())
101  fail("CudaBVHTracer: Incorrect BVH layout!");
102 
103 
104  if(!m_otrace)
105  {
106  // Get BVH buffers.
107 
108  CUdeviceptr nodePtr = m_bvh->getNodeBuffer().getCudaPtr();
109  CUdeviceptr triPtr = m_bvh->getTriWoopBuffer().getCudaPtr();
110  Buffer& indexBuf = m_bvh->getTriIndexBuffer();
111  Vec2i nodeOfsA = m_bvh->getNodeSubArray(0);
112  Vec2i nodeOfsB = m_bvh->getNodeSubArray(1);
113  Vec2i nodeOfsC = m_bvh->getNodeSubArray(2);
114  Vec2i nodeOfsD = m_bvh->getNodeSubArray(3);
115  Vec2i triOfsA = m_bvh->getTriWoopSubArray(0);
116  Vec2i triOfsB = m_bvh->getTriWoopSubArray(1);
117  Vec2i triOfsC = m_bvh->getTriWoopSubArray(2);
118 
119  // Compile kernel.
120 
121  CudaModule* module = compileKernel();
122  CudaKernel kernel = module->getKernel("trace_bvh");
123 
124  // Set parameters.
125 
126  kernel.setParams(
127  numRays, // numRays
128  (rays.getNeedClosestHit()) ? 0 : 1, // anyHit
129  rays.getRayBuffer().getCudaPtr(), // rays
130  rays.getResultBuffer().getMutableCudaPtr(), // results
131  nodePtr + nodeOfsA.x, // nodesA
132  nodePtr + nodeOfsB.x, // nodesB
133  nodePtr + nodeOfsC.x, // nodesC
134  nodePtr + nodeOfsD.x, // nodesD
135  triPtr + triOfsA.x, // trisA
136  triPtr + triOfsB.x, // trisB
137  triPtr + triOfsC.x, // trisC
138  indexBuf.getCudaPtr()); // triIndices
139 
140  // Set texture references.
141 
142  module->setTexRef("t_rays", rays.getRayBuffer(), CU_AD_FORMAT_FLOAT, 4);
143  module->setTexRef("t_nodesA", nodePtr + nodeOfsA.x, nodeOfsA.y, CU_AD_FORMAT_FLOAT, 4);
144  module->setTexRef("t_nodesB", nodePtr + nodeOfsB.x, nodeOfsB.y, CU_AD_FORMAT_FLOAT, 4);
145  module->setTexRef("t_nodesC", nodePtr + nodeOfsC.x, nodeOfsC.y, CU_AD_FORMAT_FLOAT, 4);
146  module->setTexRef("t_nodesD", nodePtr + nodeOfsD.x, nodeOfsD.y, CU_AD_FORMAT_FLOAT, 4);
147  module->setTexRef("t_trisA", triPtr + triOfsA.x, triOfsA.y, CU_AD_FORMAT_FLOAT, 4);
148  module->setTexRef("t_trisB", triPtr + triOfsB.x, triOfsB.y, CU_AD_FORMAT_FLOAT, 4);
149  module->setTexRef("t_trisC", triPtr + triOfsC.x, triOfsC.y, CU_AD_FORMAT_FLOAT, 4);
150  module->setTexRef("t_triIndices", indexBuf, CU_AD_FORMAT_SIGNED_INT32, 1);
151 
152  // Determine block and grid sizes.
153 
154  int desiredWarps = (numRays + 31) / 32;
155  if (m_kernelConfig.usePersistentThreads != 0)
156  {
157  *(S32*)module->getGlobal("g_warpCounter").getMutablePtr() = 0;
158  desiredWarps = 720; // Tesla: 30 SMs * 24 warps, Fermi: 15 SMs * 48 warps
159  }
160 
161  Vec2i blockSize(m_kernelConfig.blockWidth, m_kernelConfig.blockHeight);
162  int blockWarps = (blockSize.x * blockSize.y + 31) / 32;
163  int numBlocks = (desiredWarps + blockWarps - 1) / blockWarps;
164 
165  // Launch.
166 
167  return kernel.launchTimed(numBlocks * blockSize.x * blockSize.y, blockSize);
168  }
169  else
170  {
171  // Get BVH buffers.
172  CUdeviceptr nodePtr = m_bvh->getNodeBuffer().getCudaPtr();
173  CUdeviceptr triPtr = m_bvh->getTriWoopBuffer().getCudaPtr();
174  Buffer& indexBuf = m_bvh->getTriIndexBuffer();
175  Vec2i nodeOfsA = m_bvh->getNodeSubArray(0);
176  Vec2i nodeOfsB = m_bvh->getNodeSubArray(1);
177  Vec2i nodeOfsC = m_bvh->getNodeSubArray(2);
178  Vec2i nodeOfsD = m_bvh->getNodeSubArray(3);
179  Vec2i triOfsA = m_bvh->getTriWoopSubArray(0);
180  Vec2i triOfsB = m_bvh->getTriWoopSubArray(1);
181  Vec2i triOfsC = m_bvh->getTriWoopSubArray(2);
182 
183  // Compile kernel.
184  CudaModule* module = compileKernel();
185  CudaKernel kernel = module->getKernel("otrace_kernel");
186 
187  // Setup input struct.
188  OtraceInput& in = *(OtraceInput*)module->getGlobal("c_OtraceInput").getMutablePtr();
189  in.numRays = numRays;
190  in.anyHit = (rays.getNeedClosestHit()) ? 0 : 1;
191  in.rays = rays.getRayBuffer().getMutableCudaPtr();
193  in.nodesA = nodePtr + nodeOfsA.x;
194  in.nodesB = nodePtr + nodeOfsB.x;
195  in.nodesC = nodePtr + nodeOfsC.x;
196  in.nodesD = nodePtr + nodeOfsD.x;
197  in.trisA = triPtr + triOfsA.x;
198  in.trisB = triPtr + triOfsB.x;
199  in.trisC = triPtr + triOfsC.x;
200  in.triIndices = indexBuf.getCudaPtr();
201  if(m_scene)
202  {
215  in.randomSeed = rand();
216  }
217 
218  // Set texture references.
219 
220  module->setTexRef("t_textureAtlas", *m_scene->getTextureAtlas()->getAtlasTexture().getImage(), true, true, true, false);
221  module->setTexRef("t_rays", rays.getRayBuffer(), CU_AD_FORMAT_FLOAT, 4);
222  module->setTexRef("t_nodesA", nodePtr + nodeOfsA.x, nodeOfsA.y, CU_AD_FORMAT_FLOAT, 4);
223  module->setTexRef("t_nodesB", nodePtr + nodeOfsB.x, nodeOfsB.y, CU_AD_FORMAT_FLOAT, 4);
224  module->setTexRef("t_nodesC", nodePtr + nodeOfsC.x, nodeOfsC.y, CU_AD_FORMAT_FLOAT, 4);
225  module->setTexRef("t_nodesD", nodePtr + nodeOfsD.x, nodeOfsD.y, CU_AD_FORMAT_FLOAT, 4);
226  module->setTexRef("t_trisA", triPtr + triOfsA.x, triOfsA.y, CU_AD_FORMAT_FLOAT, 4);
227  module->setTexRef("t_trisB", triPtr + triOfsB.x, triOfsB.y, CU_AD_FORMAT_FLOAT, 4);
228  module->setTexRef("t_trisC", triPtr + triOfsC.x, triOfsC.y, CU_AD_FORMAT_FLOAT, 4);
229  module->setTexRef("t_triIndices", indexBuf, CU_AD_FORMAT_SIGNED_INT32, 1);
230 
231  // Determine block and grid sizes.
232 
233  int desiredWarps = (numRays + 31) / 32;
234  if (m_kernelConfig.usePersistentThreads != 0)
235  {
236  *(S32*)module->getGlobal("g_warpCounter").getMutablePtr() = 0;
237  desiredWarps = 720; // Tesla: 30 SMs * 24 warps, Fermi: 15 SMs * 48 warps
238  }
239 
240  Vec2i blockSize(m_kernelConfig.blockWidth, m_kernelConfig.blockHeight);
241  int blockWarps = (blockSize.x * blockSize.y + 31) / 32;
242  int numBlocks = (desiredWarps + blockWarps - 1) / blockWarps;
243 
244  // Launch.
245 
246  return kernel.launchTimed(numBlocks * blockSize.x * blockSize.y, blockSize);
247  }
248 }
249 
250 //------------------------------------------------------------------------
251 
252 CudaModule* CudaBVHTracer::compileKernel(void)
253 {
254  m_compiler.setSourceFile(sprintf("src/rt/kernels/%s.cu", m_kernelName.getPtr()));
255  m_compiler.clearDefines();
256  CudaModule* module = m_compiler.compile();
257  return module;
258 }
259 
260 //------------------------------------------------------------------------
S32 getSize() const
Gets size of the buffer (number of rays).
Definition: RayBuffer.hpp:52
#define NULL
Definition: Defs.hpp:39
const char * getPtr(void) const
Definition: String.hpp:51
Buffer & getTriVtxIndexBuffer(void)
Returns buffer of triangle's vertex indieces.
Definition: Scene.hpp:75
CUdeviceptr normals
CUdeviceptr trisC
Vec2i getNodeSubArray(int idx) const
Returns node subarray.
Definition: CudaBVH.cpp:380
CUdeviceptr matInfo
CUdeviceptr nodesD
CudaModule * compile(bool enablePrints=true, bool autoFail=true)
Buffer & getMaterialIds(void)
Returns material id buffer.
Definition: Scene.hpp:131
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule * module
Definition: DLLImports.inl:60
CUdeviceptr triVertIndex
const Image * getImage(void) const
Definition: Texture.hpp:64
BVHLayout getDesiredBVHLayout(void) const
F32 traceBatch(RayBuffer &rays)
Traces given batch of rays.
const Texture & getAtlasTexture(void)
CUdeviceptr trisA
CUdeviceptr nodesB
Buffer & getVtxPosBuffer(void)
Returns vertex position buffer.
Definition: Scene.hpp:103
CUdeviceptr nodesC
CUdeviceptr triIndices
CUdeviceptr getCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:108
CudaBVHTracer(void)
Constructor.
TextureAtlas * getTextureAtlas(void)
Returns texture atlas holding scene's textures.
Definition: Scene.hpp:151
CudaKernel getKernel(const String &name)
Definition: CudaModule.cpp:80
Definitions for the BVH CUDA Tracer.
~CudaBVHTracer(void)
Destructor.
void setKernel(const String &kernelName)
Sets kernel that should perform the actual traversal of the k-d tree on the gpu.
const U8 * getPtr(S64 ofs=0)
Definition: Buffer.hpp:106
Buffer & getRayBuffer()
Gets ray buffer.
Definition: RayBuffer.hpp:167
bool getNeedClosestHit() const
Returns whether the closest hit is needed.
Definition: RayBuffer.hpp:150
void setTexRef(const String &name, Buffer &buf, CUarray_format format, int numComponents)
Definition: CudaModule.cpp:193
int getNumVertices(void) const
Definition: Scene.hpp:66
float F32
Definition: Defs.hpp:89
Buffer & getTextureAtlasInfo(void)
Returns texture atlas information buffer.
Definition: Scene.hpp:124
CUdeviceptr getMutableCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:112
Buffer & getResultBuffer()
Gets ray result buffer.
Definition: RayBuffer.hpp:173
BVHLayout getLayout(void) const
Definition: CudaBVH.hpp:135
Buffer & getTriWoopBuffer(void)
Definition: CudaBVH.hpp:145
Ray buffer class. Stores rays.
Definition: RayBuffer.hpp:38
CUdeviceptr rays
U8 * getMutablePtr(S64 ofs=0)
Definition: Buffer.hpp:110
CUdeviceptr matId
int getNumTriangles(void) const
Definition: Scene.hpp:61
CUdeviceptr tris
signed int S32
Definition: Defs.hpp:88
CUdeviceptr emissive
String sprintf(const char *fmt,...)
Definition: Defs.cpp:241
Buffer & getMaterialInfo(void)
Returns material info buffer.
Definition: Scene.hpp:145
Buffer & getGlobal(const String &name)
Definition: CudaModule.cpp:117
CUdeviceptr verts
Buffer & getTriIndexBuffer(void)
Definition: CudaBVH.hpp:150
CudaKernel & setParams(const void *ptr, int size)
Definition: CudaKernel.hpp:83
CUdeviceptr results
int getNumEmissive(void) const
Definition: Scene.hpp:68
Buffer & getVtxNormalBuffer(void)
Returns vertex normal buffer.
Definition: Scene.hpp:110
CUdeviceptr nodesA
CUdeviceptr trisB
CudaKernel & launch(void)
Definition: CudaKernel.cpp:179
CUdeviceptr atlasInfo
Buffer & getEmissiveTris(void)
Returns buffer of emissive triangles.
Definition: Scene.hpp:138
Vec2i getTriWoopSubArray(int idx) const
Returns woop triangle subarray.
Definition: CudaBVH.cpp:392
CUdeviceptr texCoords
Buffer & getNodeBuffer(void)
Definition: CudaBVH.hpp:140
F32 launchTimed(bool yield=true)
Definition: CudaKernel.cpp:188
static void staticInit(void)
Definition: CudaModule.cpp:311
void fail(const char *fmt,...)
Definition: Defs.cpp:304
Buffer & getVtxTexCoordBuffer(void)
Returns vertex texture coordinate buffer.
Definition: Scene.hpp:117
void addOptions(const String &options)
void clearDefines(void)
void setSourceFile(const String &path)