NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CudaBVH.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2011, NVIDIA Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA Corporation nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "cuda/CudaBVH.hpp"
29 #include "base/Sort.hpp"
30 
31 using namespace FW;
32 
33 // Explicit declarations of specializations: needed so it does not matter when they are first used
34 template <>
35 bool CudaBVH::intersectTriangles<BVHLayout_AOS_AOS>(S32 node, Ray& ray, RayResult& result);
36 template <>
37 bool CudaBVH::intersectTriangles<BVHLayout_Compact>(S32 node, Ray& ray, RayResult& result);
38 template <>
39 bool CudaBVH::intersectTriangles<BVHLayout_CPU>(S32 node, Ray& ray, RayResult& result);
40 //template <>
41 //void CudaBVH::getNodeTemplate<BVHLayout_AOS_AOS>(S32 node, SplitInfo *splitInfo, AABB &child0, AABB &child1, S32 &child0Addr, S32 &child1Addr);
42 template <>
43 void CudaBVH::getNodeTemplate<BVHLayout_Compact>(S32 node, SplitInfo *splitInfo, AABB &child0, AABB &child1, S32 &child0Addr, S32 &child1Addr);
44 //template <>
45 //void CudaBVH::getNodeTemplate<BVHLayout_CPU>(S32 node, SplitInfo *splitInfo, AABB &child0, AABB &child1, S32 &child0Addr, S32 &child1Addr);
46 
47 //------------------------------------------------------------------------
48 
49 CudaBVH::CudaBVH(const BVH& bvh, BVHLayout layout)
50 : m_layout (layout)
51 {
52  FW_ASSERT(layout >= 0 && layout < BVHLayout_Max);
53 
54  if (layout == BVHLayout_Compact)
55  {
56  createCompact(bvh,1);
57  return;
58  }
59 
60  if (layout == BVHLayout_Compact2)
61  {
62  createCompact(bvh,16);
63  return;
64  }
65 
66  createNodeBasic(bvh);
67  if (layout != BVHLayout_CPU)
68  createTriWoopBasic(bvh);
69  createTriIndexBasic(bvh);
70 }
71 
72 //------------------------------------------------------------------------
73 
75 {
76  in >> (S32&)m_layout >> m_nodes >> m_triWoop >> m_triIndex;
77 }
78 
79 //------------------------------------------------------------------------
80 
82 {
83 }
84 
85 //------------------------------------------------------------------------
86 
88 {
89  out << (S32)m_layout << m_nodes << m_triWoop << m_triIndex;
90 }
91 
92 //------------------------------------------------------------------------
93 
95 {
96 #if defined(VISIBLE_CUDA_TESTED) // Copy info data from the GPU
97  for(int i = 0; i < m_triFlags.getSize()>>2; i++)
98  {
99  S32 flag = *(S32*)m_triFlags.getPtr(i * 4);
100  S32 *ref = references + i*offset;
101  (*ref) = flag>0 ? 1 : 0;
102  }
103 #elif defined(VISIBLE_RAY_HITS) // Get visibility info from the hit triangles
104  for(S32 i=0;i<rays.getSize();i++)
105  {
106  const RayResult& result = rays.getResultForSlot(i);
107 
108  // Increment the triangle hit count
109  if(result.hit())
110  {
111  S32 *ref = references + result.id*offset;
112  if((*ref) == 0)
113  (*ref)++;
114  }
115  }
116 #else // Compute the visibility info by tracing the rays
117  m_needClosestHit = rays.getNeedClosestHit();
118  m_stats = NULL;
119  m_references = references;
120  m_offset = offset;
121 
122  for(S32 i=0;i<rays.getSize();i++)
123  {
124  Ray ray = rays.getRayForSlot(i); // takes a local copy
125  RayResult& result = rays.getMutableResultForSlot(i);
126 
127  result.clear();
128 
129 #ifdef VISIBLE_HIDDEN
130  m_rayHidden = 0;
131 #endif
132 
133  switch(m_layout)
134  {
135  case BVHLayout_AOS_AOS:
136  trace<BVHLayout_AOS_AOS>(0, ray, result);
137  break;
138  case BVHLayout_Compact:
139  trace<BVHLayout_Compact>(0, ray, result);
140  break;
141  case BVHLayout_CPU:
142  trace<BVHLayout_CPU>(0, ray, result);
143  break;
144  default:
145  FW_ASSERT(0);
146  }
147 
148  //if(result.hit()) // Updates the ray so that it cannot traverse further than the closest hit (Only for OSAH build)
149  //{
150  // rays.getMutableRayForSlot(i).tmax = result.t; // Works only for benchmark, in interactive this code is not run for each frame!
151  // // However, this is almost correct behaviour as for OSAH and interactive the BVH is not updated when camera moves
152  //}
153 
154 #if !defined(VISIBLE_TOUCHED) && !defined(VISIBLE_TOUCHED_TESTED) && !defined(VISIBLE_HIDDEN)
155  // Set hit triangle as visible
156  if(result.hit())
157  {
158  S32 *ref = references + result.id*offset;
159  if((*ref) == 0)
160  (*ref)++;
161  }
162 #endif
163 #ifdef VISIBLE_HIDDEN
164  if(m_references)
165  {
166  S32 *ref = m_references + result.id*(*m_offset);
167  (*ref) = m_rayHidden == 0 ? 0 : m_rayHidden-1;
168  }
169 #endif
170  }
171 
172  //rays.getRayBuffer().setDirtyExcept(Buffer::CPU); // We have updated the CPU buffer, mark the other as dirty - they will be updated later
173 #endif
174 }
175 
176 //------------------------------------------------------------------------
177 
178 void CudaBVH::trace(RayBuffer& rays, Buffer& visibility, bool twoTrees, RayStats* stats)
179 {
180  m_needClosestHit = rays.getNeedClosestHit();
181  m_stats = stats;
182  m_references = NULL;
183 
184  S32* visib = (S32*)visibility.getMutablePtr();
185 
186  /*Ray ray = rays.getRayForSlot(0); // takes a local copy
187  ray.direction = Vec3f(0.f, -1.f, 0.f);
188  RayResult& result = rays.getMutableResultForSlot(0);
189  result.clear();
190  trace<BVHLayout_Compact>(0, ray, result);*/
191 
192  if(twoTrees)
193  {
194  for(S32 i=0;i<rays.getSize();i++)
195  {
196  Ray ray = rays.getRayForSlot(i); // takes a local copy
197  RayResult& result = rays.getMutableResultForSlot(i);
198 
199  result.clear();
200  result.t = ray.tmax;
201 
202  if(stats)
203  {
204  stats->platform = *m_platform;
205  stats->numRays++;
206  }
207 
208  switch(m_layout)
209  {
210  case BVHLayout_AOS_AOS:
211  trace<BVHLayout_AOS_AOS>(1, ray, result);
212  trace<BVHLayout_AOS_AOS>(2, ray, result);
213  break;
214  case BVHLayout_Compact:
215  trace<BVHLayout_Compact>(64, ray, result);
216  trace<BVHLayout_Compact>(128, ray, result);
217  break;
218  case BVHLayout_CPU:
219  trace<BVHLayout_CPU>(1, ray, result);
220  trace<BVHLayout_CPU>(2, ray, result);
221  break;
222  default:
223  FW_ASSERT("Unspported BVH layout\n");
224  }
225 
226  // Set visibility
227  if(result.hit())
228  visib[result.id] = 1;
229  }
230  }
231  else
232  {
233  for(S32 i=0;i<rays.getSize();i++)
234  {
235  Ray ray = rays.getRayForSlot(i); // takes a local copy
236  RayResult& result = rays.getMutableResultForSlot(i);
237 
238  result.clear();
239  result.t = ray.tmax;
240 
241  if(stats)
242  {
243  stats->platform = *m_platform;
244  stats->numRays++;
245  }
246 
247  switch(m_layout)
248  {
249  case BVHLayout_AOS_AOS:
250  trace<BVHLayout_AOS_AOS>(0, ray, result);
251  break;
252  case BVHLayout_Compact:
253  trace<BVHLayout_Compact>(0, ray, result);
254  break;
255  case BVHLayout_CPU:
256  trace<BVHLayout_CPU>(0, ray, result);
257  break;
258  default:
259  FW_ASSERT("Unspported BVH layout\n");
260  }
261 
262  // Set visibility
263  if(result.hit())
264  visib[result.id] = 1;
265  }
266  }
267 }
268 
269 
270 //------------------------------------------------------------------------
271 
272 //void CudaBVH::trace(RayBuffer& rays, CudaBVH& emptyBVH, RayStats* stats)
273 void CudaBVH::trace(RayBuffer& rays, Buffer& visibility, Array<AABB>& emptyBVH, RayStats* stats)
274 {
275  m_needClosestHit = rays.getNeedClosestHit();
276  m_stats = stats;
277  m_references = NULL;
278 
279  S32* visib = (S32*)visibility.getMutablePtr();
280 
281  /*Ray ray = rays.getRayForSlot(0); // takes a local copy
282  ray.direction = Vec3f(0.f, -1.f, 0.f);
283  RayResult& result = rays.getMutableResultForSlot(0);
284  result.clear();
285  trace<BVHLayout_Compact>(0, ray, result, emptyBVH);*/
286 
287  for(S32 i=0;i<rays.getSize();i++)
288  {
289  Ray ray = rays.getRayForSlot(i); // takes a local copy
290  RayResult& result = rays.getMutableResultForSlot(i);
291 
292  result.clear();
293 
294  if(stats)
295  {
296  stats->platform = *m_platform;
297  stats->numRays++;
298  }
299 
300  switch(m_layout)
301  {
302  case BVHLayout_AOS_AOS:
303  trace<BVHLayout_AOS_AOS>(0, ray, result, emptyBVH);
304  break;
305  case BVHLayout_Compact:
306  trace<BVHLayout_Compact>(0, ray, result, emptyBVH);
307  break;
308  case BVHLayout_CPU:
309  trace<BVHLayout_CPU>(0, ray, result, emptyBVH);
310  break;
311  default:
312  FW_ASSERT("Unspported BVH layout\n");
313  }
314 
315  // Set visibility
316  if(result.hit())
317  visib[result.id] = 1;
318  }
319 }
320 
321 //------------------------------------------------------------------------
322 
323 void CudaBVH::getNode(S32 node, SplitInfo *splitInfo, AABB &child0, AABB &child1, S32 &child0Addr, S32 &child1Addr)
324 {
325  switch(m_layout)
326  {
327  case BVHLayout_Compact:
328  getNodeTemplate<BVHLayout_Compact>(node, splitInfo, child0, child1, child0Addr, child1Addr);
329  break;
330  case BVHLayout_AOS_AOS:
331  case BVHLayout_CPU:
332  getNodeTemplate<BVHLayout_CPU>(node, splitInfo, child0, child1, child0Addr, child1Addr);
333  break;
334  default:
335  FW_ASSERT("Unspported BVH layout\n");
336  }
337 }
338 
339 //------------------------------------------------------------------------
340 
342 {
343  Buffer &nodes = getNodeBuffer();
344  Buffer &tris = getTriIndexBuffer();
345  Buffer &woop = getTriWoopBuffer();
346 
347  switch(m_layout)
348  {
349  case BVHLayout_Compact:
350  {
351  for(int triAddr = (-node-1); ; triAddr += 3)
352  {
353  U32 guard = floatToBits(*(F32*)woop.getMutablePtr(triAddr * 16 + 0));
354  if(guard == 0x80000000)
355  break;
356 
357  indices.add(*(S32*)tris.getMutablePtr(triAddr*4));
358  }
359  }
360  break;
361  case BVHLayout_AOS_AOS:
362  case BVHLayout_CPU:
363  {
364  node = (-node-1);
365 
366  int lo = *(S32*)nodes.getMutablePtr(node * 64 + 48);
367  int hi = *(S32*)nodes.getMutablePtr(node * 64 + 52);
368 
369  for(int i=lo; i<hi; i++)
370  indices.add(*(S32*)tris.getMutablePtr(i*4));
371  }
372  break;
373  default:
374  FW_ASSERT("Unspported BVH layout\n");
375  }
376 }
377 
378 //------------------------------------------------------------------------
379 
381 {
382  FW_ASSERT(idx >= 0 && idx < 4);
383  S32 size = (S32)m_nodes.getSize();
384 
385  if (m_layout == BVHLayout_SOA_AOS || m_layout == BVHLayout_SOA_SOA)
386  return Vec2i((size >> 2) * idx, (size >> 2));
387  return Vec2i(0, size);
388 }
389 
390 //------------------------------------------------------------------------
391 
393 {
394  FW_ASSERT(idx >= 0 && idx < 4);
395  S32 size = (S32)m_triWoop.getSize();
396 
397  if (m_layout == BVHLayout_AOS_SOA || m_layout == BVHLayout_SOA_SOA)
398  return Vec2i((size >> 2) * idx, (size >> 2));
399  return Vec2i(0, size);
400 }
401 
402 //------------------------------------------------------------------------
403 
405 {
406  if (&other != this)
407  {
408  m_layout = other.m_layout;
409  m_nodes = other.m_nodes;
410  m_triWoop = other.m_triWoop;
411  m_triIndex = other.m_triIndex;
412  }
413  return *this;
414 }
415 
416 //------------------------------------------------------------------------
417 
418 void CudaBVH::createNodeBasic(const BVH& bvh)
419 {
420  struct StackEntry
421  {
422  const BVHNode* node;
423  S32 idx;
424 
425  StackEntry(const BVHNode* n = NULL, int i = 0) : node(n), idx(i) {}
426  int encodeIdx(void) const { return (node->isLeaf()) ? ~idx : idx; }
427  };
428 
429  const BVHNode* root = bvh.getRoot();
430  m_nodes.resizeDiscard((root->getSubtreeSize(BVH_STAT_NODE_COUNT) * 64 + Align - 1) & -Align);
431 
432  int nextNodeIdx = 0;
433  Array<StackEntry> stack(StackEntry(root, nextNodeIdx++));
434  while (stack.getSize())
435  {
436  StackEntry e = stack.removeLast();
437  const AABB* b0;
438  const AABB* b1;
439  int c0;
440  int c1;
441 
442  // Leaf?
443 
444  if (e.node->isLeaf())
445  {
446  const LeafNode* leaf = reinterpret_cast<const LeafNode*>(e.node);
447  b0 = &leaf->m_bounds;
448  b1 = &leaf->m_bounds;
449  c0 = leaf->m_lo;
450  c1 = leaf->m_hi;
451  }
452 
453  // Internal node?
454 
455  else
456  {
457  StackEntry e0 = stack.add(StackEntry(e.node->getChildNode(0), nextNodeIdx++));
458  StackEntry e1 = stack.add(StackEntry(e.node->getChildNode(1), nextNodeIdx++));
459  b0 = &e0.node->m_bounds;
460  b1 = &e1.node->m_bounds;
461  c0 = e0.encodeIdx();
462  c1 = e1.encodeIdx();
463  }
464 
465  // Write entry.
466 
467  Vec4i data[] =
468  {
469  Vec4i(floatToBits(b0->min().x), floatToBits(b0->max().x), floatToBits(b0->min().y), floatToBits(b0->max().y)),
470  Vec4i(floatToBits(b1->min().x), floatToBits(b1->max().x), floatToBits(b1->min().y), floatToBits(b1->max().y)),
471  Vec4i(floatToBits(b0->min().z), floatToBits(b0->max().z), floatToBits(b1->min().z), floatToBits(b1->max().z)),
472  Vec4i(c0, c1, 0, 0)
473  };
474 
475  switch (m_layout)
476  {
477  case BVHLayout_AOS_AOS:
478  case BVHLayout_AOS_SOA:
479  case BVHLayout_CPU:
480  memcpy(m_nodes.getMutablePtr(e.idx * 64), data, 64);
481  break;
482 
483  case BVHLayout_SOA_AOS:
484  case BVHLayout_SOA_SOA:
485  for (int i = 0; i < 4; i++)
486  memcpy(m_nodes.getMutablePtr(e.idx * 16 + (m_nodes.getSize() >> 2) * i), &data[i], 16);
487  break;
488 
489  default:
490  FW_ASSERT(false);
491  break;
492  }
493  }
494 }
495 
496 //------------------------------------------------------------------------
497 
498 void CudaBVH::createTriWoopBasic(const BVH& bvh)
499 {
500  const Array<S32>& tidx = bvh.getTriIndices();
501  m_triWoop.resizeDiscard((tidx.getSize() * 64 + Align - 1) & -Align);
502 
503  for (int i = 0; i < tidx.getSize(); i++)
504  {
505  woopifyTri(bvh, i);
506 
507  switch (m_layout)
508  {
509  case BVHLayout_AOS_AOS:
510  case BVHLayout_SOA_AOS:
511  memcpy(m_triWoop.getMutablePtr(i * 64), m_woop, 48);
512  break;
513 
514  case BVHLayout_AOS_SOA:
515  case BVHLayout_SOA_SOA:
516  for (int j = 0; j < 3; j++)
517  memcpy(m_triWoop.getMutablePtr(i * 16 + (m_triWoop.getSize() >> 2) * j), &m_woop[j], 16);
518  break;
519 
520  default:
521  FW_ASSERT(false);
522  break;
523  }
524  }
525 }
526 
527 //------------------------------------------------------------------------
528 
529 void CudaBVH::createTriIndexBasic(const BVH& bvh)
530 {
531  const Array<S32>& tidx = bvh.getTriIndices();
532  m_triIndex.resizeDiscard(tidx.getSize() * 4);
533 
534  for (int i = 0; i < tidx.getSize(); i++)
535  *(S32*)m_triIndex.getMutablePtr(i * 4) = tidx[i];
536 }
537 
538 //------------------------------------------------------------------------
539 
540 void CudaBVH::createCompact(const BVH& bvh, int nodeOffsetSizeDiv)
541 {
542  struct StackEntry
543  {
544  const BVHNode* node;
545  S32 idx;
546 
547  StackEntry(const BVHNode* n = NULL, int i = 0) : node(n), idx(i) {}
548  };
549 
550  // Construct data.
551 
552  Array<Vec4i> nodeData(NULL, 4);
553  Array<Vec4i> triWoopData;
554  Array<S32> triIndexData;
555  Array<StackEntry> stack(StackEntry(bvh.getRoot(), 0));
556 
557  while (stack.getSize())
558  {
559  StackEntry e = stack.removeLast();
560  FW_ASSERT(e.node->getNumChildNodes() == 2);
561  const AABB* cbox[2];
562  int cidx[2];
563 
564  // Process children.
565 
566  for (int i = 0; i < 2; i++)
567  {
568  // Inner node => push to stack.
569 
570  const BVHNode* child = e.node->getChildNode(i);
571  cbox[i] = &child->m_bounds;
572  if (!child->isLeaf())
573  {
574  cidx[i] = nodeData.getNumBytes() / nodeOffsetSizeDiv;
575  stack.add(StackEntry(child, nodeData.getSize()));
576  nodeData.add(NULL, 4);
577  continue;
578  }
579 
580  // Leaf => append triangles.
581 
582  const LeafNode* leaf = reinterpret_cast<const LeafNode*>(child);
583  cidx[i] = ~triWoopData.getSize();
584  for (int j = leaf->m_lo; j < leaf->m_hi; j++)
585  {
586  woopifyTri(bvh, j);
587  if (m_woop[0].x == 0.0f)
588  m_woop[0].x = 0.0f;
589  triWoopData.add((Vec4i*)m_woop, 3);
590  triIndexData.add(bvh.getTriIndices()[j]);
591  triIndexData.add(0);
592  triIndexData.add(0);
593  }
594 
595  // Terminator.
596 
597  triWoopData.add(0x80000000);
598  triIndexData.add(0);
599  }
600 
601  // Write entry.
602 
603  Vec4i* dst = nodeData.getPtr(e.idx);
604  dst[0] = Vec4i(floatToBits(cbox[0]->min().x), floatToBits(cbox[0]->max().x), floatToBits(cbox[0]->min().y), floatToBits(cbox[0]->max().y));
605  dst[1] = Vec4i(floatToBits(cbox[1]->min().x), floatToBits(cbox[1]->max().x), floatToBits(cbox[1]->min().y), floatToBits(cbox[1]->max().y));
606  dst[2] = Vec4i(floatToBits(cbox[0]->min().z), floatToBits(cbox[0]->max().z), floatToBits(cbox[1]->min().z), floatToBits(cbox[1]->max().z));
607  dst[3] = Vec4i(cidx[0], cidx[1], 0, 0);
608  }
609 
610  // Write to buffers.
611 
612  m_nodes.resizeDiscard(nodeData.getNumBytes());
613  m_nodes.set(nodeData.getPtr(), nodeData.getNumBytes());
614 
615  m_triWoop.resizeDiscard(triWoopData.getNumBytes());
616  m_triWoop.set(triWoopData.getPtr(), triWoopData.getNumBytes());
617 
618  m_triIndex.resizeDiscard(triIndexData.getNumBytes());
619  m_triIndex.set(triIndexData.getPtr(), triIndexData.getNumBytes());
620 }
621 
622 //------------------------------------------------------------------------
623 
624 void CudaBVH::woopifyTri(const BVH& bvh, int idx)
625 {
626  const Vec3i* triVtxIndex = (const Vec3i*)bvh.getScene()->getTriVtxIndexBuffer().getPtr();
627  const Vec3f* vtxPos = (const Vec3f*)bvh.getScene()->getVtxPosBuffer().getPtr();
628  const Vec3i& inds = triVtxIndex[bvh.getTriIndices()[idx]];
629  const Vec3f& v0 = vtxPos[inds.x];
630  const Vec3f& v1 = vtxPos[inds.y];
631  const Vec3f& v2 = vtxPos[inds.z];
632 
633  Mat4f mtx;
634  mtx.setCol(0, Vec4f(v0 - v2, 0.0f));
635  mtx.setCol(1, Vec4f(v1 - v2, 0.0f));
636  mtx.setCol(2, Vec4f(cross(v0 - v2, v1 - v2), 0.0f));
637  mtx.setCol(3, Vec4f(v2, 1.0f));
638  mtx = invert(mtx);
639 
640  m_woop[0] = Vec4f(mtx(2,0), mtx(2,1), mtx(2,2), -mtx(2,3));
641  m_woop[1] = mtx.getRow(0);
642  m_woop[2] = mtx.getRow(1);
643 }
644 
645 //------------------------------------------------------------------------
646 
648 {
649  return a > b ? a : b;
650 }
651 
652 //------------------------------------------------------------------------
653 
654 template <BVHLayout LAYOUT>
655 void CudaBVH::trace(S32 node, Ray& ray, RayResult& result)
656 {
657  S32 stack[100];
658  //F32 tStack[100];
659  int stackIndex = 1;
660 
661  while(stackIndex > 0)
662  {
663  for(;;)
664  {
665  if(node < 0)
666  {
667  bool end = intersectTriangles<LAYOUT>(node, ray, result);
668  if(end)
669  return;
670 
671  break;
672  }
673  else
674  {
675  const int TMIN = 0;
676  const int TMAX = 1;
677 
678  AABB child0, child1;
679  S32 child0Addr, child1Addr;
680 
681  getNodeTemplate<LAYOUT>(node, NULL, child0, child1, child0Addr, child1Addr);
682 
683  Vec2f tspan0 = Intersect::RayBox(child0, ray);
684  Vec2f tspan1 = Intersect::RayBox(child1, ray);
685 #ifdef VISIBLE_HIDDEN
686  bool intersect0, intersect1;
687  if(m_references)
688  {
689  intersect0 = (tspan0[TMIN]<=tspan0[TMAX]) && (tspan0[TMAX]>=ray.tmin);
690  intersect1 = (tspan1[TMIN]<=tspan1[TMAX]) && (tspan1[TMAX]>=ray.tmin);
691  }
692  else
693  {
694  intersect0 = (tspan0[TMIN]<=tspan0[TMAX]) && (tspan0[TMAX]>=ray.tmin) && (tspan0[TMIN]<=ray.tmax);
695  intersect1 = (tspan1[TMIN]<=tspan1[TMAX]) && (tspan1[TMAX]>=ray.tmin) && (tspan1[TMIN]<=ray.tmax);
696  }
697 #else
698  bool intersect0 = (tspan0[TMIN]<=tspan0[TMAX]) && (tspan0[TMAX]>=ray.tmin) && (tspan0[TMIN]<=ray.tmax);
699  bool intersect1 = (tspan1[TMIN]<=tspan1[TMAX]) && (tspan1[TMAX]>=ray.tmin) && (tspan1[TMIN]<=ray.tmax);
700 #endif
701 
702  if(m_stats)
703  {
704  m_stats->numNodeTests += m_platform->roundToNodeBatchSize( 2 );
705  result.padB += m_platform->roundToNodeBatchSize( 2 );
706 
707  if(intersect0 && intersect1)
708  result.padA = _max(result.padA, max(tspan0[TMIN], tspan1[TMIN]));
709  else if(intersect0)
710  result.padA = _max(result.padA, tspan0[TMIN]);
711  else if(intersect1)
712  result.padA = _max(result.padA, tspan1[TMIN]);
713  }
714 
715  if(intersect0 && intersect1)
716  {
717  if(tspan0[TMIN] > tspan1[TMIN])
718  {
719  swap(tspan0,tspan1);
720  swap(child0Addr,child1Addr);
721  }
722  node = child0Addr;
723  //tStack[stackIndex] = tspan1[TMIN];
724  stack[stackIndex++] = child1Addr;
725  }
726  else if(intersect0)
727  node = child0Addr;
728  else if(intersect1)
729  node = child1Addr;
730  else
731  break;
732  }
733  }
734  //do
735  //{
736  stackIndex--;
737  node = stack[stackIndex];
738  //} while(tStack[stackIndex] > ray.tmax);
739  }
740 }
741 
742 //------------------------------------------------------------------------
743 
744 //int eCompare(void* data, int idxA, int idxB)
745 bool eCompare(void* data, int idxA, int idxB)
746 {
747  const Vec2f* ptr = (const Vec2f*)data;
748  const F32& ma = ptr[idxA].x;
749  const F32& mb = ptr[idxB].x;
750  return (ma < mb) ? false : (ma > mb) ? true : false;
751 }
752 
753 //------------------------------------------------------------------------
754 
755 void eSwap(void* data, int idxA, int idxB)
756 {
757  Vec2f* ptr = (Vec2f*)data;
758  FW::swap(ptr[idxA], ptr[idxB]);
759 }
760 
761 
762 //------------------------------------------------------------------------
763 
764 template <BVHLayout LAYOUT>
765 //void CudaBVH::trace(S32 node, Ray& ray, RayResult& result, CudaBVH& emptyBVH)
766 void CudaBVH::trace(S32 node, Ray& ray, RayResult& result, Array<AABB>& emptyBVH)
767 {
768  S32 stack[100];
769  //Vec2f inter[100];
770  Vec2f empty[12];
771  int stackIndex = 1;
772  int emptyIndex = 1;
773 
774  // Find empty intervals
775  int hitCount = 1;
776  for(int i = 0; i < emptyBVH.getSize(); i++)
777  {
778  Vec2f tspan = Intersect::RayBox(emptyBVH[i], ray);
779  if(tspan.x<tspan.y && tspan.y>ray.tmin) // box hit
780  {
781  empty[hitCount] = tspan;
782  hitCount++;
783  }
784  }
785  //if(m_stats)
786  //{
787  // m_stats->numNodeTests += m_platform->roundToNodeBatchSize( emptyBVH.getSize() );
788  // result.padB += m_platform->roundToNodeBatchSize( emptyBVH.getSize() );
789  //}
790 
791  empty[0] = Vec2f((hitCount == 1) ? FW_F32_MAX : -FW_F32_MAX, ray.tmin); // If no empty box is hit, disable empty skips
792 
793  // Sort the empty intervals
794  //sort(1, hitCount, empty, eCompare, eSwap);
795  sort(empty, 1, hitCount, eCompare, eSwap);
796 
797  // Compact the empty intervals
798  for(int i = 1; i < hitCount;)
799  {
800  if(empty[i].x < empty[i-1].y) // Overlapping
801  {
802  empty[i-1].y = max(empty[i-1].y, empty[i].y);
803  hitCount--;
804  memmove(&empty[i], &empty[i+1], (hitCount-i)*sizeof(Vec2f));
805  }
806  else
807  {
808  i++;
809  }
810  }
811 
812  empty[hitCount] = Vec2f(FW_F32_MAX, ray.tmin); // Set the end of the array
813  ray.tmin = empty[0].y;
814 
815  // Root skip
816  /*Vec2f tspan = Intersect::RayBox(emptyBVH[0], ray);
817  if(tspan.x <= ray.tmin)
818  ray.tmin = max(ray.tmin, tspan.y);*/
819 
820  // Trace the empty BVH
821  /*Vec2f tspan0;
822  while(stackIndex > 0)
823  {
824  for(;;)
825  {
826  if(node < 0)
827  {
828  if(emptyIndex != 0 && tspan0.x < empty[emptyIndex-1].y)
829  {
830  empty[emptyIndex-1].y = max(tspan0.y, empty[emptyIndex-1].y);
831  }
832  else
833  {
834  empty[emptyIndex] = tspan0;
835  //empty[emptyIndex] = Vec2f(FW_F32_MAX, FW_F32_MAX);
836  emptyIndex++;
837  }
838 
839  break;
840  }
841  else
842  {
843  const int TMIN = 0;
844  const int TMAX = 1;
845 
846  AABB child0, child1;
847  S32 child0Addr, child1Addr;
848 
849  emptyBVH.getNodeTemplate<LAYOUT>(node, NULL, child0, child1, child0Addr, child1Addr);
850 
851  tspan0 = Intersect::RayBox(child0, ray);
852  Vec2f tspan1 = Intersect::RayBox(child1, ray);
853 
854  bool intersect0 = (tspan0[TMIN]<=tspan0[TMAX]) && (tspan0[TMAX]>=ray.tmin);
855  bool intersect1 = (tspan1[TMIN]<=tspan1[TMAX]) && (tspan1[TMAX]>=ray.tmin);
856 
857  if(m_stats)
858  {
859  m_stats->numNodeTests += m_platform->roundToNodeBatchSize( 2 );
860  result.padB += m_platform->roundToNodeBatchSize( 2 );
861  }
862 
863  if(intersect0 && intersect1)
864  {
865  if(tspan0[TMIN] > tspan1[TMIN])
866  {
867  swap(tspan0,tspan1);
868  swap(child0Addr,child1Addr);
869  }
870  node = child0Addr;
871  stack[stackIndex] = child1Addr;
872  inter[stackIndex] = tspan1;
873  stackIndex++;
874  }
875  else if(intersect0)
876  {
877  node = child0Addr;
878  }
879  else if(intersect1)
880  {
881  node = child1Addr;
882  tspan0 = tspan1;
883  }
884  else
885  break;
886  }
887  }
888 
889  stackIndex--;
890  node = stack[stackIndex];
891  tspan0 = inter[stackIndex];
892  }
893 
894  stackIndex = 1;
895  node = 0; // Start from the root.
896  empty[emptyIndex] = Vec2f(FW_F32_MAX, FW_F32_MAX); // Set the end of the array
897  emptyIndex = 0; // Rewind to the beginning*/
898  S32 eidx[100];
899  eidx[0] = 0;
900 
901  // Trace the BVH
902  while(stackIndex > 0)
903  {
904  for(;;)
905  {
906  if(node < 0)
907  {
908  bool end = intersectTriangles<LAYOUT>(node, ray, result);
909  if(end)
910  return;
911 
912  break;
913  }
914  else
915  {
916  const int TMIN = 0;
917  const int TMAX = 1;
918 
919  AABB child0, child1;
920  S32 child0Addr, child1Addr;
921 
922  getNodeTemplate<LAYOUT>(node, NULL, child0, child1, child0Addr, child1Addr);
923 
924  Vec2f tspan0 = Intersect::RayBox(child0, ray);
925  Vec2f tspan1 = Intersect::RayBox(child1, ray);
926 
927  bool intersect0 = (tspan0[TMIN]<=tspan0[TMAX]) && (tspan0[TMAX]>=ray.tmin) && (tspan0[TMIN]<=ray.tmax);
928  bool intersect1 = (tspan1[TMIN]<=tspan1[TMAX]) && (tspan1[TMAX]>=ray.tmin) && (tspan1[TMIN]<=ray.tmax);
929 
930  if(m_stats)
931  {
932  m_stats->numNodeTests += m_platform->roundToNodeBatchSize( 2 );
933  result.padB += m_platform->roundToNodeBatchSize( 2 );
934 
935  if(intersect0 && intersect1)
936  result.padA = _max(result.padA, max(tspan0[TMIN], tspan1[TMIN]));
937  else if(intersect0)
938  result.padA = _max(result.padA, tspan0[TMIN]);
939  else if(intersect1)
940  result.padA = _max(result.padA, tspan1[TMIN]);
941  }
942 
943  if(intersect0 && intersect1)
944  {
945  if(tspan0[TMIN] > tspan1[TMIN])
946  {
947  swap(tspan0,tspan1);
948  swap(child0Addr,child1Addr);
949  }
950  node = child0Addr;
951  stack[stackIndex] = child1Addr;
952  if(tspan1[TMIN] > empty[emptyIndex].x)
953  eidx[stackIndex] = emptyIndex+1;
954  else
955  eidx[stackIndex] = emptyIndex;
956  stackIndex++;
957  }
958  else if(intersect0)
959  {
960  node = child0Addr;
961  }
962  else if(intersect1)
963  {
964  node = child1Addr;
965  tspan0 = tspan1;
966  }
967  else
968  break;
969 
970  if(tspan0[TMIN] > empty[emptyIndex].x) // Pop node from empty stack and update tmin
971  {
972  ray.tmin = max(ray.tmin, empty[emptyIndex].y); // Set tmin to the empty box's cmax
973  emptyIndex++;
974  }
975  }
976  }
977 
978  stackIndex--;
979  node = stack[stackIndex];
980  emptyIndex = eidx[stackIndex];
981  ray.tmin = empty[max(emptyIndex-1, 0)].y;
982  //ray.tmin = 0.0f;
983  //emptyIndex = 0;
984  }
985 }
986 
987 //------------------------------------------------------------------------
988 
989 template <>
990 bool CudaBVH::intersectTriangles<BVHLayout_AOS_AOS>(S32 node, Ray& ray, RayResult& result)
991 {
992  Buffer &nodes = getNodeBuffer();
993  Buffer &woop = getTriWoopBuffer();
994  Buffer &tris = getTriIndexBuffer();
995 
996  node = (-node-1);
997 
998  int lo, hi;
999  lo = *(S32*)nodes.getMutablePtr(node * 64 + 48);
1000  hi = *(S32*)nodes.getMutablePtr(node * 64 + 52);
1001  if(m_stats)
1002  {
1003  m_stats->numTriangleTests += m_platform->roundToTriangleBatchSize( hi-lo );
1004  result.padB += m_platform->roundToTriangleBatchSize( hi-lo ) << 16;
1005  }
1006 
1007 #ifndef MASK_TRACE_EMPTY
1008  if(lo == hi) // empty leaf
1009  {
1010  AABB bound;
1011  bound.min() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 0)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 8)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 32)));
1012  bound.max() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 4)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 12)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 36)));
1013 
1014  Vec2f tspan = Intersect::RayBox(bound, ray);
1015  ray.tmin = tspan[1];
1016  }
1017 #endif
1018 
1019  for(int i=lo; i<hi; i++)
1020  {
1021  S32 index = *(S32*)tris.getMutablePtr(i*4);
1022 
1023  const Vec4f& zpleq = *(Vec4f*)woop.getMutablePtr(i * 64 + 0);
1024  const Vec4f& upleq = *(Vec4f*)woop.getMutablePtr(i * 64 + 16);
1025  const Vec4f& vpleq = *(Vec4f*)woop.getMutablePtr(i * 64 + 32);
1026  Vec3f bary = Intersect::RayTriangleWoop(zpleq,upleq,vpleq, ray);
1027  float t = bary[2];
1028 
1029  bool end = updateHit(ray, result, t, index);
1030  if(end)
1031  return true;
1032  }
1033 
1034  return false;
1035 }
1036 
1037 //------------------------------------------------------------------------
1038 
1039 template <>
1040 bool CudaBVH::intersectTriangles<BVHLayout_Compact>(S32 node, Ray& ray, RayResult& result)
1041 {
1042  Buffer &woop = getTriWoopBuffer();
1043  Buffer &tris = getTriIndexBuffer();
1044 
1045  for(int triAddr = (-node-1); ; triAddr += 3)
1046  {
1047  U32 guard = floatToBits(*(F32*)woop.getMutablePtr(triAddr * 16 + 0));
1048  if(guard == 0x80000000)
1049  break;
1050 #ifndef MASK_TRACE_EMPTY
1051  else if(floatToBits(*(F32*)woop.getMutablePtr(triAddr * 16 + 12)) == 0x80000000) // empty leaf
1052  {
1053  AABB bound;
1054  bound.min() = *(Vec3f*)woop.getMutablePtr(triAddr * 16 + 0);
1055  bound.max() = *(Vec3f*)woop.getMutablePtr(triAddr * 16 + 16);
1056 
1057  Vec2f tspan = Intersect::RayBox(bound, ray);
1058  ray.tmin = tspan[1];
1059  break;
1060  }
1061 #endif
1062 
1063  if(m_stats)
1064  {
1065  m_stats->numTriangleTests ++;
1066  result.padB += 1 << 16;
1067  }
1068 
1069  S32 index = *(S32*)tris.getMutablePtr(triAddr*4);
1070  const Vec4f& zpleq = *(Vec4f*)woop.getMutablePtr(triAddr * 16 + 0);
1071  const Vec4f& upleq = *(Vec4f*)woop.getMutablePtr(triAddr * 16 + 16);
1072  const Vec4f& vpleq = *(Vec4f*)woop.getMutablePtr(triAddr * 16 + 32);
1073  Vec3f bary = Intersect::RayTriangleWoop(zpleq,upleq,vpleq, ray);
1074  float t = bary[2];
1075 
1076  bool end = updateHit(ray, result, t, index);
1077  if(end)
1078  return true;
1079  }
1080 
1081  return false;
1082 }
1083 
1084 //------------------------------------------------------------------------
1085 
1086 template <>
1087 bool CudaBVH::intersectTriangles<BVHLayout_CPU>(S32 node, Ray& ray, RayResult& result)
1088 {
1089  Buffer &nodes = getNodeBuffer();
1090  Buffer &tris = getTriIndexBuffer();
1091 
1092  node = (-node-1);
1093 
1094  int lo, hi;
1095  lo = *(S32*)nodes.getMutablePtr(node * 64 + 48);
1096  hi = *(S32*)nodes.getMutablePtr(node * 64 + 52);
1097  if(m_stats)
1098  {
1099  m_stats->numTriangleTests += m_platform->roundToTriangleBatchSize( hi-lo );
1100  result.padB += m_platform->roundToTriangleBatchSize( hi-lo ) << 16;
1101  }
1102 
1103 #ifndef MASK_TRACE_EMPTY
1104  if(lo == hi) // empty leaf
1105  {
1106  AABB bound;
1107  bound.min() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 0)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 8)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 32)));
1108  bound.max() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 4)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 12)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 36)));
1109 
1110  Vec2f tspan = Intersect::RayBox(bound, ray);
1111  ray.tmin = tspan[1];
1112  }
1113 #endif
1114 
1115  for(int i=lo; i<hi; i++)
1116  {
1117  S32 index = *(S32*)tris.getMutablePtr(i*4);
1118  //const Vec3i& ind = m_scene->getTriangle(index).vertices;
1119  //const Vec3f& v0 = m_scene->getVertex(ind.x);
1120  //const Vec3f& v1 = m_scene->getVertex(ind.y);
1121  //const Vec3f& v2 = m_scene->getVertex(ind.z);
1122  const Vec3i& ind = ((Vec3i*)m_scene->getTriVtxIndexBuffer().getPtr())[index];
1123  const Vec3f& v0 = ((Vec3f*)m_scene->getVtxPosBuffer().getPtr())[ind.x];
1124  const Vec3f& v1 = ((Vec3f*)m_scene->getVtxPosBuffer().getPtr())[ind.y];
1125  const Vec3f& v2 = ((Vec3f*)m_scene->getVtxPosBuffer().getPtr())[ind.z];
1126  Vec3f bary = Intersect::RayTriangle(v0,v1,v2, ray);
1127  float t = bary[2];
1128 
1129  bool end = updateHit(ray, result, t, index);
1130  if(end)
1131  return true;
1132  }
1133 
1134  return false;
1135 }
1136 
1137 //------------------------------------------------------------------------
1138 
1139 bool CudaBVH::updateHit(Ray& ray, RayResult& result, float t, S32 index)
1140 {
1141 #ifdef VISIBLE_TOUCHED
1142  // Set close triangle as visible
1143  if(m_references)
1144  {
1145  S32 *ref = m_references + index*(m_offset);
1146  if((*ref) == 0)
1147  (*ref)++;
1148  }
1149 #endif
1150 
1151 #ifdef VISIBLE_HIDDEN
1152  if(t < FW_F32_MAX)
1153  m_rayHidden++;
1154 #endif
1155 
1156  if(t>ray.tmin && t<ray.tmax)
1157  {
1158 #ifdef VISIBLE_TOUCHED_TESTED
1159  // Set close triangle as visible
1160  if(m_references)
1161  {
1162  S32 *ref = m_references + index*(m_offset);
1163  if((*ref) == 0)
1164  (*ref)++;
1165  }
1166 #endif
1167 #ifdef VISIBLE_HIDDEN
1168  if(!m_references)
1169  ray.tmax = t;
1170 #else
1171  ray.tmax = t;
1172 #endif
1173  result.t = t;
1174  result.id = index;
1175 
1176  if(!m_needClosestHit)
1177  return true;
1178  }
1179 
1180  return false;
1181 }
1182 
1183 //------------------------------------------------------------------------
1184 
1185 template <BVHLayout LAYOUT>
1186 void CudaBVH::getNodeTemplate(S32 node, SplitInfo *splitInfo, AABB &child0, AABB &child1, S32 &child0Addr, S32 &child1Addr)
1187 {
1188  Buffer &nodes = getNodeBuffer();
1189 
1190  child0.min() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 0)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 8)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 32)));
1191  child0.max() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 4)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 12)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 36)));
1192 
1193  child1.min() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 16)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 24)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 40)));
1194  child1.max() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 20)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 28)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 44)));
1195 
1196  child0Addr = *(S32*)nodes.getMutablePtr(node * 64 + 48);
1197  child1Addr = *(S32*)nodes.getMutablePtr(node * 64 + 52);
1198 
1199  if(splitInfo != NULL)
1200  *splitInfo = SplitInfo(*(unsigned long*)nodes.getMutablePtr(node * 64 + 56));
1201 }
1202 
1203 //------------------------------------------------------------------------
1204 
1205 template <>
1206 void CudaBVH::getNodeTemplate<BVHLayout_Compact>(S32 node, SplitInfo *splitInfo, AABB &child0, AABB &child1, S32 &child0Addr, S32 &child1Addr)
1207 {
1208  Buffer &nodes = getNodeBuffer();
1209 
1210  child0.min() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node + 0)), bitsToFloat(*(U32*)nodes.getMutablePtr(node + 8)), bitsToFloat(*(U32*)nodes.getMutablePtr(node + 32)));
1211  child0.max() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node + 4)), bitsToFloat(*(U32*)nodes.getMutablePtr(node + 12)), bitsToFloat(*(U32*)nodes.getMutablePtr(node + 36)));
1212 
1213  child1.min() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node + 16)), bitsToFloat(*(U32*)nodes.getMutablePtr(node + 24)), bitsToFloat(*(U32*)nodes.getMutablePtr(node + 40)));
1214  child1.max() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node + 20)), bitsToFloat(*(U32*)nodes.getMutablePtr(node + 28)), bitsToFloat(*(U32*)nodes.getMutablePtr(node + 44)));
1215 
1216  child0Addr = *(S32*)nodes.getMutablePtr(node + 48);
1217  child1Addr = *(S32*)nodes.getMutablePtr(node + 52);
1218 
1219  if(splitInfo != NULL)
1220  *splitInfo = SplitInfo(*(unsigned long*)nodes.getMutablePtr(node + 56));
1221 }
1222 
1223 /*template <>
1224 void CudaBVH::getNodeTemplate<BVHLayout_CPU>(S32 node, SplitInfo *splitInfo, AABB &child0, AABB &child1, S32 &child0Addr, S32 &child1Addr)
1225 {
1226  Buffer &nodes = getNodeBuffer();
1227 
1228  child0.min() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 0)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 8)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 32)));
1229  child0.max() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 4)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 12)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 36)));
1230 
1231  child1.min() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 16)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 24)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 40)));
1232  child1.max() = Vec3f(bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 20)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 28)), bitsToFloat(*(U32*)nodes.getMutablePtr(node * 64 + 44)));
1233 
1234  child0Addr = *(S32*)nodes.getMutablePtr(node * 64 + 48);
1235  child1Addr = *(S32*)nodes.getMutablePtr(node * 64 + 52);
1236 
1237  if(splitInfo != NULL)
1238  *splitInfo = SplitInfo(*(unsigned long*)nodes.getMutablePtr(node * 64 + 56));
1239 }*/
1240 
1241 //------------------------------------------------------------------------
U32 floatToBits(F32 a)
Definition: Math.hpp:95
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int offset
Definition: DLLImports.inl:84
RayStats * m_stats
Definition: CudaBVH.hpp:225
S32 getSize() const
Gets size of the buffer (number of rays).
Definition: RayBuffer.hpp:52
#define NULL
Definition: Defs.hpp:39
S32 m_lo
Lower index to the BVH's triangle index array.
Definition: BVHNode.hpp:314
#define FW_F32_MAX
Definition: Defs.hpp:118
void set(const void *ptr)
Definition: Buffer.hpp:92
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1
Definition: DLLImports.inl:353
Cuda BVH class.
Definition: CudaBVH.hpp:93
Buffer & getTriVtxIndexBuffer(void)
Returns buffer of triangle's vertex indieces.
Definition: Scene.hpp:75
Vec2i getNodeSubArray(int idx) const
Returns node subarray.
Definition: CudaBVH.cpp:380
void sort(void *data, int start, int end, SortCompareFunc compareFunc, SortSwapFunc swapFunc, bool multicore=false)
Definition: Sort.cpp:203
void trace(RayBuffer &rays, Buffer &visibility, bool twoTrees, RayStats *stats=NULL)
Definition: CudaBVH.cpp:178
bool eCompare(void *data, int idxA, int idxB)
Definition: CudaBVH.cpp:745
FW_CUDA_FUNC const Vec3f & max(void) const
Definition: Util.hpp:49
BVH leaf node.
Definition: BVHNode.hpp:275
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid * data
Definition: DLLImports.inl:319
void ** ptr
Definition: DLLImports.cpp:74
void eSwap(void *data, int idxA, int idxB)
Definition: CudaBVH.cpp:755
Buffer & getVtxPosBuffer(void)
Returns vertex position buffer.
Definition: Scene.hpp:103
float t
Definition: Util.hpp:84
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2
Definition: DLLImports.inl:355
FW_CUDA_FUNC bool hit(void) const
Definition: Util.hpp:80
void findVisibleTriangles(RayBuffer &rays, S32 *references, S32 offset)
Definition: CudaBVH.cpp:94
S32 numRays
Total number of rays.
Definition: BVH.hpp:62
Vec2f RayBox(const AABB &box, const Ray &ray)
Definition: Util.cpp:34
Definition: Util.hpp:62
S64 getSize(void) const
Definition: Buffer.hpp:69
Structure holding ray statistics. Also provides print to the console. These statistics are used in a ...
Definition: BVH.hpp:45
const Ray & getRayForSlot(S32 slot) const
Gets a ray assigned to a given slot.
Definition: RayBuffer.hpp:89
FW_CUDA_FUNC const S32 * getPtr(void) const
Definition: Math.hpp:287
const U8 * getPtr(S64 ofs=0)
Definition: Buffer.hpp:106
S32 getNumBytes(void) const
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat x
Definition: DLLImports.inl:363
bool getNeedClosestHit() const
Returns whether the closest hit is needed.
Definition: RayBuffer.hpp:150
CudaBVH & operator=(CudaBVH &other)
Assignment operator.
Definition: CudaBVH.cpp:404
~CudaBVH(void)
Destructor.
Definition: CudaBVH.cpp:81
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat GLfloat GLfloat z
Definition: DLLImports.inl:363
Array< S32 > & getTriIndices(void)
Returns an array of triangle indices to which leaf nodes are pointig. These indices point to scene's ...
Definition: BVH.hpp:180
float F32
Definition: Defs.hpp:89
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat GLfloat y
Definition: DLLImports.inl:363
Vec3f RayTriangle(const Vec3f &v0, const Vec3f &v1, const Vec3f &v2, const Ray &ray)
Definition: Util.cpp:50
Scene * getScene(void) const
FW_CUDA_FUNC S invert(const MatrixBase< T, L, S > &v)
Definition: Math.hpp:784
Buffer & getTriWoopBuffer(void)
Definition: CudaBVH.hpp:145
Ray buffer class. Stores rays.
Definition: RayBuffer.hpp:38
U8 * getMutablePtr(S64 ofs=0)
Definition: Buffer.hpp:110
Vec3f RayTriangleWoop(const Vec4f &zpleq, const Vec4f &upleq, const Vec4f &vpleq, const Ray &ray)
Definition: Util.cpp:99
FW_CUDA_FUNC T min(const VectorBase< T, L, S > &v)
Definition: Math.hpp:461
FW_CUDA_FUNC T max(const VectorBase< T, L, S > &v)
Definition: Math.hpp:462
BVH acceleration structure class.
Definition: BVH.hpp:74
Declarations for the Cuda version of the BVH.
float tmax
Definition: Util.hpp:70
#define FW_ASSERT(X)
Definition: Defs.hpp:67
signed int S32
Definition: Defs.hpp:88
S32 padA
Definition: Util.hpp:85
FW_CUDA_FUNC void clear(void)
Definition: Util.hpp:81
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat v0
Definition: DLLImports.inl:353
virtual bool isLeaf() const =0
T & add(void)
Definition: Array.hpp:384
FW_CUDA_FUNC const Vec3f & min(void) const
Definition: Util.hpp:48
float tmin
Definition: Util.hpp:68
S32 numNodeTests
Total number of ray-node tests.
Definition: BVH.hpp:64
CudaBVH(const BVH &bvh, BVHLayout layout)
Constructor.
Definition: CudaBVH.cpp:49
unsigned int U32
Definition: Defs.hpp:85
void serialize(OutputStream &out)
Writes Cuda BVH to the output stream.
Definition: CudaBVH.cpp:87
AABB m_bounds
Bounding box of the node.
Definition: BVHNode.hpp:172
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint index
Definition: DLLImports.inl:363
Class holding information about a split of a BVH node.
Definition: BVHNode.hpp:58
FW_CUDA_FUNC Vector< T, L > getRow(int r) const
Definition: Math.hpp:881
F32 bitsToFloat(U32 a)
Definition: Math.hpp:96
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction f
Definition: DLLImports.inl:88
FW_CUDA_FUNC F32 cross(const Vec2f &a, const Vec2f &b)
Definition: Math.hpp:481
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei n
Definition: DLLImports.inl:325
const RayResult & getResultForSlot(S32 slot) const
Gets a ray result assigned to a given slot.
Definition: RayBuffer.hpp:103
Buffer & getTriIndexBuffer(void)
Definition: CudaBVH.hpp:150
int getSubtreeSize(BVH_STAT stat=BVH_STAT_NODE_COUNT) const
Calculates various information about the node's subtree.
Definition: BVHNode.cpp:36
FW_CUDA_FUNC void swap(T &a, T &b)
Definition: Defs.hpp:183
Vec2i getTriWoopSubArray(int idx) const
Returns woop triangle subarray.
Definition: CudaBVH.cpp:392
BVH virtual node. Parent class of both a leaf node and an inner node.
Definition: BVHNode.hpp:136
void getTriangleIndices(S32 node, Array< S32 > &indices)
Definition: CudaBVH.cpp:341
Buffer & getNodeBuffer(void)
Definition: CudaBVH.hpp:140
void getNode(S32 node, SplitInfo *splitInfo, AABB &child0, AABB &child1, S32 &child0Addr, S32 &child1Addr)
Definition: CudaBVH.cpp:323
const T * getPtr(S32idx=0) const
void resizeDiscard(S64 size)
Definition: Buffer.hpp:83
S32 _max(S32 a, F32 b)
Definition: CudaBVH.cpp:647
FW_CUDA_FUNC void setCol(int c, const VectorBase< T, L, V > &v)
Definition: Math.hpp:576
S32 padB
Definition: Util.hpp:86
BVHNode * getRoot(void) const
Returns root node of the BVH.
Definition: BVH.hpp:167
RayResult & getMutableResultForSlot(S32 slot)
Gets a mutable ray assigned to a given slot.
Definition: RayBuffer.hpp:110
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr size
Definition: DLLImports.inl:319
S32 m_hi
Higher index to the BVH's triangle index array.
Definition: BVHNode.hpp:315
S getSize(void) const
Definition: Array.hpp:188
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat GLfloat GLfloat z GLuint GLint GLenum GLboolean GLsizei const GLvoid pointer GLuint GLuint const GLchar name GLenum GLsizei GLenum GLsizei GLsizei height GLenum GLuint renderbuffer GLenum GLenum GLint params GLuint GLsizei range GLuint GLsizei const GLubyte GLsizei GLenum const GLvoid coords GLuint GLsizei GLsizei GLsizei const GLubyte GLsizei GLenum const GLvoid coords GLuint GLenum GLsizei const GLvoid pathString GLuint GLenum const GLvoid GLbitfield GLuint GLsizei GLenum GLuint GLfloat emScale GLuint GLuint srcPath GLuint GLuint GLenum const GLfloat transformValues GLuint GLenum GLint value GLuint GLenum GLfloat value GLenum GLint ref
Definition: DLLImports.inl:400
Platform platform
Platform settings of the BVH. Set by whoever sets the stats.
Definition: BVH.hpp:66
S32 roundToNodeBatchSize(S32 n) const
Rounds given value up to the nearest node batch size multiple.
Definition: Platform.hpp:139