NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CudaBVH.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2011, NVIDIA Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA Corporation nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
33 #pragma once
34 #include "gpu/Buffer.hpp"
35 #include "io/Stream.hpp"
36 #include "bvh/BVH.hpp"
37 #include "CudaAS.hpp"
39 #include "gpu/CudaModule.hpp"
40 
41 namespace FW
42 {
43 //------------------------------------------------------------------------
44 // Nodes / BVHLayout_Compact
45 // nodes[innerOfs + 0 ] = Vec4f(c0.lo.x, c0.hi.x, c0.lo.y, c0.hi.y)
46 // nodes[innerOfs + 16] = Vec4f(c1.lo.x, c1.hi.x, c1.lo.y, c1.hi.y)
47 // nodes[innerOfs + 32] = Vec4f(c0.lo.z, c0.hi.z, c1.lo.z, c1.hi.z)
48 // nodes[innerOfs + 48] = Vec4i(c0.innerOfs or ~c0.triOfs, c1.innerOfs or ~c1.triOfs, 0, 0)
49 //
50 // TriWoop / BVHLayout_Compact
51 // triWoop[triOfs*16 + 0 ] = Vec4f(woopZ)
52 // triWoop[triOfs*16 + 16] = Vec4f(woopU)
53 // triWoop[triOfs*16 + 32] = Vec4f(woopV)
54 // triWoop[endOfs*16 + 0 ] = Vec4f(-0.0f, -0.0f, -0.0f, -0.0f)
55 //
56 // TriIndex / BVHLayout_Compact
57 // triIndex[triOfs*4] = origIdx
58 //
59 //------------------------------------------------------------------------
60 //
61 // Nodes / BVHLayout_AOS_AOS, BVHLayout_AOS_SOA
62 // nodes[node*64 + 0 ] = Vec4f(c0.lo.x, c0.hi.x, c0.lo.y, c0.hi.y)
63 // nodes[node*64 + 16] = Vec4f(c1.lo.x, c1.hi.x, c1.lo.y, c1.hi.y)
64 // nodes[node*64 + 32] = Vec4f(c0.lo.z, c0.hi.z, c1.lo.z, c1.hi.z)
65 // nodes[inner*64 + 48] = Vec4f(c0.inner or ~c0.leaf, c1.inner or ~c1.leaf, 0, 0)
66 // nodes[leaf*64 + 48] = Vec4i(triStart, triEnd, 0, 0)
67 //
68 // Nodes / BVHLayout_SOA_AOS, BVHLayout_SOA_SOA
69 // nodes[node*16 + size*0/4] = Vec4f(c0.lo.x, c0.hi.x, c0.lo.y, c0.hi.y)
70 // nodes[node*16 + size*1/4] = Vec4f(c1.lo.x, c1.hi.x, c1.lo.y, c1.hi.y)
71 // nodes[node*16 + size*2/4] = Vec4f(c0.lo.z, c0.hi.z, c1.lo.z, c1.hi.z)
72 // nodes[inner*16 + size*3/4] = Vec4f(c0.inner or ~c0.leaf, c1.inner or ~c1.leaf, 0, 0)
73 // nodes[leaf*16 + size*3/4] = Vec4i(triStart, triEnd, 0, 0)
74 //
75 // TriWoop / BVHLayout_AOS_AOS, BVHLayout_SOA_AOS
76 // triWoop[tri*64 + 0 ] = Vec4f(woopZ)
77 // triWoop[tri*64 + 16] = Vec4f(woopU)
78 // triWoop[tri*64 + 32] = Vec4f(woopV)
79 //
80 // TriWoop / BVHLayout_AOS_SOA, BVHLayout_SOA_SOA
81 // triWoop[tri*16 + size*0/4] = Vec4f(woopZ)
82 // triWoop[tri*16 + size*1/4] = Vec4f(woopU)
83 // triWoop[tri*16 + size*2/4] = Vec4f(woopV)
84 //
85 // TriIndex / BVHLayout_AOS_AOS, BVHLayout_AOS_SOA, BVHLayout_SOA_AOS, BVHLayout_SOA_SOA
86 // triIndex[tri*4] = origIdx
87 //------------------------------------------------------------------------
88 
93 class CudaBVH : public CudaAS
94 {
95 public:
96  enum
97  {
98  Align = 4096
99  };
100 
101 public:
107  explicit CudaBVH (const BVH& bvh, BVHLayout layout);
108 
113  explicit CudaBVH (BVHLayout layout) : m_layout(layout) { ; }
114 
119  CudaBVH (CudaBVH& other) { operator=(other); }
120 
125  explicit CudaBVH (InputStream& in);
126 
130  ~CudaBVH (void);
131 
135  BVHLayout getLayout (void) const { return m_layout; }
136 
140  Buffer& getNodeBuffer (void) { return m_nodes; }
141 
145  Buffer& getTriWoopBuffer (void) { return m_triWoop; }
146 
150  Buffer& getTriIndexBuffer (void) { return m_triIndex; }
151 
157  Vec2i getNodeSubArray (int idx) const; // (ofs, size)
158 
164  Vec2i getTriWoopSubArray (int idx) const; // (ofs, size)
165 
171  CudaBVH& operator= (CudaBVH& other);
172 
177  void serialize (OutputStream& out);
178 
179  void setTraceParams (Platform* platform, Scene* scene) { FW_ASSERT(platform && scene); m_platform = platform; m_scene = scene; }
180  void findVisibleTriangles(RayBuffer& rays, S32* references, S32 offset);
181  void trace (RayBuffer& rays, Buffer& visibility, bool twoTrees, RayStats* stats = NULL);
182  //void trace (RayBuffer& rays, CudaBVH& emptyBVH, RayStats* stats = NULL);
183  void trace (RayBuffer& rays, Buffer& visibility, Array<AABB>& emptyBVH, RayStats* stats = NULL);
184 
185  bool isLeaf (S32 node) {return node < 0;}
186  void getNode (S32 node, SplitInfo *splitInfo, AABB &child0, AABB &child1, S32 &child0Addr, S32 &child1Addr);
187  void getTriangleIndices (S32 node, Array<S32>& indices);
188 
189  Scene* getScene () {return m_scene;}
190 
191 private:
192  void createNodeBasic (const BVH& bvh);
193  void createTriWoopBasic (const BVH& bvh);
194  void createTriIndexBasic (const BVH& bvh);
195  void createCompact (const BVH& bvh, int nodeOffsetSizeDiv);
196 
197  void woopifyTri (const BVH& bvh, int idx);
198 
199  template <BVHLayout LAYOUT>
200  void trace (S32 node, Ray& ray, RayResult& result);
201  template <BVHLayout LAYOUT>
202  //void trace (S32 node, Ray& ray, RayResult& result, CudaBVH& emptyBVH);
203  void trace (S32 node, Ray& ray, RayResult& result, Array<AABB>& emptyBVH);
204  template <BVHLayout LAYOUT>
205  bool intersectTriangles (S32 node, Ray& ray, RayResult& result);
206  template <BVHLayout LAYOUT>
207  void getNodeTemplate (S32 node, SplitInfo *splitInfo, AABB &child0, AABB &child1, S32 &child0Addr, S32 &child1Addr);
208  bool updateHit (Ray& ray, RayResult& result, float t, S32 index);
209 
210 private:
211  BVHLayout m_layout;
212  Buffer m_nodes;
213  Buffer m_triWoop;
214  Buffer m_triIndex;
215  Vec4f m_woop[3];
216 
217  Platform* m_platform;
218  Scene* m_scene;
219 
220  bool m_needClosestHit;
221  S32* m_references;
222  S32 m_offset;
223 
224 public:
226 };
227 
228 //------------------------------------------------------------------------
229 }
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int offset
Definition: DLLImports.inl:84
Definitions for acceleration structure interface.
RayStats * m_stats
Definition: CudaBVH.hpp:225
#define NULL
Definition: Defs.hpp:39
Cuda BVH class.
Definition: CudaBVH.hpp:93
Vec2i getNodeSubArray(int idx) const
Returns node subarray.
Definition: CudaBVH.cpp:380
void trace(RayBuffer &rays, Buffer &visibility, bool twoTrees, RayStats *stats=NULL)
Definition: CudaBVH.cpp:178
Scene * getScene()
Definition: CudaBVH.hpp:189
void findVisibleTriangles(RayBuffer &rays, S32 *references, S32 offset)
Definition: CudaBVH.cpp:94
Definition: Util.hpp:62
Structure holding ray statistics. Also provides print to the console. These statistics are used in a ...
Definition: BVH.hpp:45
CudaBVH & operator=(CudaBVH &other)
Assignment operator.
Definition: CudaBVH.cpp:404
~CudaBVH(void)
Destructor.
Definition: CudaBVH.cpp:81
Declarations for the BVH acceleration structure.
BVHLayout getLayout(void) const
Definition: CudaBVH.hpp:135
Buffer & getTriWoopBuffer(void)
Definition: CudaBVH.hpp:145
Ray buffer class. Stores rays.
Definition: RayBuffer.hpp:38
BVH acceleration structure class.
Definition: BVH.hpp:74
#define FW_ASSERT(X)
Definition: Defs.hpp:67
signed int S32
Definition: Defs.hpp:88
CudaBVH(CudaBVH &other)
Copy constructor.
Definition: CudaBVH.hpp:119
CudaBVH(const BVH &bvh, BVHLayout layout)
Constructor.
Definition: CudaBVH.cpp:49
void serialize(OutputStream &out)
Writes Cuda BVH to the output stream.
Definition: CudaBVH.cpp:87
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint index
Definition: DLLImports.inl:363
Class holding information about a split of a BVH node.
Definition: BVHNode.hpp:58
Class holding 3d scene.
Definition: Scene.hpp:44
void setTraceParams(Platform *platform, Scene *scene)
Definition: CudaBVH.hpp:179
bool isLeaf(S32 node)
Definition: CudaBVH.hpp:185
Buffer & getTriIndexBuffer(void)
Definition: CudaBVH.hpp:150
Class holding various SAH and batch processing parameters.
Definition: Platform.hpp:46
Vec2i getTriWoopSubArray(int idx) const
Returns woop triangle subarray.
Definition: CudaBVH.cpp:392
void getTriangleIndices(S32 node, Array< S32 > &indices)
Definition: CudaBVH.cpp:341
Buffer & getNodeBuffer(void)
Definition: CudaBVH.hpp:140
CudaBVH(BVHLayout layout)
Constructor.
Definition: CudaBVH.hpp:113
void getNode(S32 node, SplitInfo *splitInfo, AABB &child0, AABB &child1, S32 &child0Addr, S32 &child1Addr)
Definition: CudaBVH.cpp:323
Interface for acceleration structure.
Definition: CudaAS.hpp:19