NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
FastKDTreeBuilder.cpp
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2013, Radek Stibora
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the <organization> nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 
29 
30 #include "base/Sort.hpp"
31 
32 //#define DEBUG
33 #define MERGESORT
34 
35 using namespace FW;
36 
37 //------------------------------------------------------------------------
38 
40 :
41  m_kdtree (kdtree),
42  m_platform (kdtree.getPlatform()),
43  m_params (params),
44  m_numDuplicates (0),
45  m_maxDepth ((S32)(1.2f * log2((FW::F32)kdtree.getScene()->getNumTriangles()) + 2.f)),
46  m_maxFailSplits ((S32)(1.f + 0.2f * m_maxDepth))
47  {}
48 
49 //------------------------------------------------------------------------
50 
52 {
53  // Initialize event and triangle index stack and determine root bounds.
54 
55  const Vec3i* tris = (const Vec3i*)m_kdtree.getScene()->getTriVtxIndexBuffer().getPtr();
56  const Vec3f* verts = (const Vec3f*)m_kdtree.getScene()->getVtxPosBuffer().getPtr();
57 
58  NodeSpec rootSpec;
59 
60  m_measureTimer.unstart();
61  m_measureTimer.start();
62 
63  m_evStack.reset();
64  m_triStack.reset();
65  m_mergeSortBuffer.reset();
66 
67  // Compute bounds and generate initial set of events.
68 
69  rootSpec.numTri = m_kdtree.getScene()->getNumTriangles();
70  m_triData.resize(rootSpec.numTri);
71  Event e;
72  for (int i = 0; i < rootSpec.numTri; i++)
73  {
74  m_triStack.add(i);
75 
76  // Compute bounds.
77  AABB triBounds;
78  for (int j = 0; j < 3; j++)
79  triBounds.grow(verts[tris[i][j]]);
80  rootSpec.bounds.grow(triBounds);
81 
82  e.triIdx = i;
83 
84  for (int dim = 0; dim < 3; dim++)
85  {
86  e.dim = dim;
87 
88  if (triBounds.min()[dim] == triBounds.max()[dim])
89  {
90  e.pos = triBounds.min()[dim];
91  e.type = Planar;
92  m_evStack.add(e);
93  }
94  else
95  {
96  e.pos = triBounds.min()[dim];
97  e.type = Start;
98  m_evStack.add(e);
99 
100  e.pos = triBounds.max()[dim];
101  e.type = End;
102  m_evStack.add(e);
103  }
104  }
105  }
106 
107 #ifndef MERGESORT
108  sort(&m_evStack, 0, m_evStack.getSize(), eventSortCompare, eventSortSwap);
109 #else
110  m_mergeSortBuffer.reserve(m_evStack.getSize());
111  msort(m_evStack, 0, m_evStack.getSize());
112 #endif
113  rootSpec.numEv = m_evStack.getSize();
114 
115  F32 initTime = m_measureTimer.getElapsed();
116  if (m_params.enablePrints)
117  {
118  std::printf("FastKDTreeBuilder: initialization done in %f seconds.\n", initTime);
119  }
120 
121  m_progressTimer.start();
122 
123  // Build recursively.
124 
125  KDTreeNode* root = buildNode(rootSpec, 0, 0, 0.0f, 1.0f);
126  m_kdtree.getTriIndices().compact();
127 
128  F32 totalTime = m_measureTimer.getElapsed();
129  F32 buildTime = totalTime - initTime;
130 
131  if (m_params.enablePrints)
132  {
133  std::printf("FastKDTreeBuilder: progress %.0f%%, built in %f seconds. Total time: %f seconds\n", 100.0f, buildTime, totalTime);
134  }
135 
136  return root;
137 }
138 
139 //------------------------------------------------------------------------
140 
141 KDTreeNode* FastKDTreeBuilder::createLeaf(const NodeSpec& spec)
142 {
143  Array<S32>& tris = m_kdtree.getTriIndices();
144 
145  int stackSize = m_triStack.getSize();
146  for (int i = stackSize - spec.numTri; i < stackSize; i++)
147  {
148  tris.add(m_triStack.removeLast());
149  }
150 
151  m_evStack.remove(m_evStack.getSize() - spec.numEv, m_evStack.getSize());
152 
153  return new KDTLeafNode(tris.getSize() - spec.numTri, tris.getSize());
154 }
155 
156 //------------------------------------------------------------------------
157 
158 KDTreeNode* FastKDTreeBuilder::buildNode(const NodeSpec& spec, int level, int forcedSplits, F32 progressStart, F32 progressEnd)
159 {
160  // Display progress.
161 
162  if (m_params.enablePrints && m_progressTimer.getElapsed() >= 1.0f)
163  {
164  std::printf("FastKDTreeBuilder: progress %.0f%%\r", progressStart * 100.0f);
165  m_progressTimer.start();
166  }
167 
168  if (/*spec.numTri <= m_platform.getMaxLeafSize() ||*/ level == m_maxDepth)
169  return createLeaf(spec);
170 
171  F32 nodePrice = m_platform.getTriangleCost(spec.numTri);
172  Split split = findSplit(spec);
173 
174 #ifdef DEBUG
175  if (split.price < 0.f)
176  FW_ASSERT(0);
177 #endif
178 
179  if (split.price / nodePrice > 0.9f)
180  forcedSplits++;
181 
182  if (split.price == FW_F32_MAX || forcedSplits > m_maxFailSplits)
183  return createLeaf(spec);
184 
185  NodeSpec left, right;
186  performSplit(left, right, spec, split);
187 
188  F32 progressMid = lerp(progressStart, progressEnd, (F32)right.numTri / (F32)(left.numTri + right.numTri));
189  KDTreeNode* rightNode = buildNode(right, level + 1, forcedSplits, progressStart, progressMid);
190  KDTreeNode* leftNode = buildNode(left, level + 1, forcedSplits, progressMid, progressEnd);
191 
192  return new KDTInnerNode(split.pos, split.dim, leftNode, rightNode);
193 }
194 
195 //------------------------------------------------------------------------
196 
197 FastKDTreeBuilder::Split FastKDTreeBuilder::findSplit(const NodeSpec& spec) const
198 {
199  //if (spec.bounds.min()[0] == spec.bounds.max()[0] ||
200  // spec.bounds.min()[1] == spec.bounds.max()[1] ||
201  // spec.bounds.min()[2] == spec.bounds.max()[2] ) // ??
202  //{
203  // Split dontSplit;
204  // dontSplit.price = FW_F32_MAX;
205  // return dontSplit;
206  //}
207 
208  S32 nl [3]; S32 np [3]; S32 nr [3];
209 
210 
211  for (int i = 0; i < 3; i++)
212  {
213  nl[i] = 0; np[i] = 0; nr[i] = spec.numTri;
214  }
215 
216  Split bestSplit;
217  for (int i = m_evStack.getSize() - spec.numEv; i < m_evStack.getSize();)
218  {
219  S32 numEnds = 0; S32 numPlanar = 0; S32 numStarts = 0;
220  const F32 pos = m_evStack.get(i).pos;
221  const S32 dim = m_evStack.get(i).dim;
222 
223  while(i < m_evStack.getSize() && m_evStack.get(i).dim == dim &&
224  m_evStack.get(i).pos == pos && m_evStack.get(i).type == End)
225  {
226  numEnds++; i++;
227  }
228 
229  while(i < m_evStack.getSize() && m_evStack.get(i).dim == dim &&
230  m_evStack.get(i).pos == pos && m_evStack.get(i).type == Planar)
231  {
232  numPlanar++; i++;
233  }
234 
235  while(i < m_evStack.getSize() && m_evStack.get(i).dim == dim &&
236  m_evStack.get(i).pos == pos && m_evStack.get(i).type == Start)
237  {
238  numStarts++; i++;
239  }
240 
241  np[dim] = numPlanar;
242  nr[dim] -= numPlanar;
243  nr[dim] -= numEnds;
244 
245  Split currentSplit;
246  currentSplit.dim = dim;
247  currentSplit.pos = pos;
248 
249  F32 costLeftSide = sahPrice(currentSplit, spec.bounds, nl[dim] + np[dim], nr[dim]);
250  F32 costRightSide = sahPrice(currentSplit, spec.bounds, nl[dim], nr[dim] + np[dim]);
251  if (costLeftSide < costRightSide)
252  {
253  currentSplit.price = costLeftSide;
254  currentSplit.side = Left;
255  }
256  else
257  {
258  currentSplit.price = costRightSide;
259  currentSplit.side = Right;
260  }
261 
262 #ifdef DEBUG
263  if (currentSplit.pos < spec.bounds.min()[dim] || currentSplit.pos > spec.bounds.max()[dim])
264  FW_ASSERT(0);
265 #endif
266 
267  if(currentSplit.price < bestSplit.price)
268  bestSplit = currentSplit;
269 
270  nl[dim] += numStarts;
271  nl[dim] += numPlanar;
272  np[dim] = 0;
273  }
274  return bestSplit;
275 }
276 
277 //------------------------------------------------------------------------
278 
279 void FastKDTreeBuilder::performSplit (NodeSpec& left, NodeSpec& right, const NodeSpec& spec, const Split& split)
280 {
281  // Clasiffy triangles.
282 
283  for (int i = m_evStack.getSize() - spec.numEv; i < m_evStack.getSize(); i++)
284  {
285  const Event& currEv = m_evStack[i];
286 
287  if (currEv.type == End && currEv.dim == split.dim && currEv.pos <= split.pos)
288  {
289  m_triData[currEv.triIdx].side = LeftOnly;
290  }
291  else if (currEv.type == Start && currEv.dim == split.dim && currEv.pos >= split.pos)
292  {
293  m_triData[currEv.triIdx].side = RightOnly;
294  }
295  else if (currEv.type == Planar && currEv.dim == split.dim)
296  {
297  if (currEv.pos < split.pos || (currEv.pos == split.pos && split.side == Left))
298  {
299  m_triData[currEv.triIdx].side = LeftOnly;
300  }
301  else if (currEv.pos > split.pos || (currEv.pos == split.pos && split.side == Right))
302  {
303  m_triData[currEv.triIdx].side = RightOnly;
304  }
305  }
306  }
307 
308  // Separate events related to triangles which do not straddle the split plane.
309 
310  for (int i = m_evStack.getSize() - spec.numEv; i < m_evStack.getSize(); i++)
311  {
312  if (m_triData[m_evStack[i].triIdx].side == LeftOnly)
313  {
314  m_eventsLO.add(m_evStack[i]);
315  }
316  else if (m_triData[m_evStack[i].triIdx].side == RightOnly)
317  {
318  m_eventsRO.add(m_evStack[i]);
319  }
320  }
321 
322  // Process straddling triangles. Also adjust triangle index stack.
323 
324  for (int i = m_triStack.getSize() - spec.numTri; i < m_triStack.getSize(); i++)
325  {
326  if (m_triData[m_triStack[i]].side == LeftOnly)
327  {
328  m_leftTriIdx.add(m_triStack[i]);
329  }
330  else if (m_triData[m_triStack[i]].side == RightOnly)
331  {
332  m_rightTriIdx.add(m_triStack[i]);
333  }
334 
335  // Generate new events.
336 
337  if (m_triData[m_triStack[i]].side == Both)
338  {
339  AABB leftBounds; AABB rightBounds;
340  splitBounds(leftBounds, rightBounds, m_triStack[i], split);
341 
342  leftBounds.intersect(spec.bounds);
343  rightBounds.intersect(spec.bounds);
344  //leftBounds.intersect(m_triData[m_triStack[i]].bounds);
345  //rightBounds.intersect(m_triData[m_triStack[i]].bounds);
346 
347  AABB* bounds[] = {NULL, NULL};
348  Array<Event>* events [] = {&m_eventsBL, &m_eventsBR};
349 
350  if (leftBounds.valid())
351  {
352  m_leftTriIdx.add(m_triStack[i]);
353  bounds[0] = &leftBounds;
354  }
355  if (rightBounds.valid())
356  {
357  m_rightTriIdx.add(m_triStack[i]);
358  bounds[1] = &rightBounds;
359  }
360 
361  m_numDuplicates++;
362 
363  Event e;
364  e.triIdx = m_triStack[i];
365 
366  for (int s = 0; s < 2; s++)
367  {
368  if (bounds[s] == NULL)
369  continue;
370 
371  const Vec3f min = bounds[s]->min();
372  const Vec3f max = bounds[s]->max();
373 
374  for (int dim = 0; dim < 3; dim++)
375  {
376  e.dim = dim;
377 
378  if(min[dim] == max[dim])
379  {
380  e.pos = min[dim];
381  e.type = Planar;
382  events[s]->add(e);
383  }
384  else
385  {
386  e.pos = min[dim];
387  e.type = Start;
388  events[s]->add(e);
389 
390  e.pos = max[dim];
391  e.type = End;
392  events[s]->add(e);
393  }
394  }
395  }
396  }
397 
398  m_triData[m_triStack[i]].side = Both;
399  }
400 
401 #ifndef MERGESORT
402  sort(&m_eventsBL, 0, m_eventsBL.getSize(), eventSortCompare, eventSortSwap);
403  sort(&m_eventsBR, 0, m_eventsBR.getSize(), eventSortCompare, eventSortSwap);
404 #else
405  msort(m_eventsBL, 0, m_eventsBL.getSize());
406  msort(m_eventsBR, 0, m_eventsBR.getSize());
407 #endif
408 
409  // Merge events.
410 
411  S32 stackTop = m_evStack.getSize() - spec.numEv;
412  left.numEv = m_eventsLO.getSize() + m_eventsBL.getSize();
413  right.numEv = m_eventsRO.getSize() + m_eventsBR.getSize();
414  m_evStack.resize((m_evStack.getSize() - spec.numEv) + left.numEv + right.numEv);
415 
416  mergeEvents(stackTop, m_eventsLO, m_eventsBL);
417  mergeEvents(stackTop, m_eventsRO, m_eventsBR);
418 
419  left.numTri = m_leftTriIdx.getSize();
420  right.numTri = m_rightTriIdx.getSize();
421 
422  stackTop = m_triStack.getSize() - spec.numTri;
423  m_triStack.resize(m_triStack.getSize() - spec.numTri);
424 
425  m_triStack.insert(stackTop, m_leftTriIdx);
426  stackTop += left.numTri;
427  m_triStack.insert(stackTop, m_rightTriIdx);
428 
429  // Split bounding box.
430 
431  left.bounds = spec.bounds;
432  left.bounds.max()[split.dim] = split.pos;
433 
434  right.bounds = spec.bounds;
435  right.bounds.min()[split.dim] = split.pos;
436 
437  m_eventsLO.clear();
438  m_eventsRO.clear();
439  m_eventsBL.clear();
440  m_eventsBR.clear();
441  m_leftTriIdx.clear();
442  m_rightTriIdx.clear();
443 }
444 
445 //------------------------------------------------------------------------
446 
447 void FastKDTreeBuilder::mergeEvents (S32& stackTop, const Array<Event>& a, const Array<Event>& b)
448 {
449  int idxA = 0; int idxB = 0;
450  for (int i = 0; i < a.getSize() + b.getSize(); i++)
451  {
452  if (idxA == a.getSize())
453  {
454  m_evStack[stackTop++] = b.get(idxB++);
455  }
456  else if (idxB == b.getSize())
457  {
458  m_evStack[stackTop++] = a.get(idxA++);
459  }
460  else if (FastKDTreeBuilder::eventCompare(a.get(idxA), b.get(idxB)))
461  {
462  m_evStack[stackTop++] = a.get(idxA++);
463  }
464  else
465  {
466  m_evStack[stackTop++] = b.get(idxB++);
467  }
468  }
469 }
470 
471 //------------------------------------------------------------------------
472 
473 F32 FastKDTreeBuilder::sahPrice(const Split& split, const AABB& bounds, S32 nl, S32 nr) const
474 {
475  AABB leftBounds = bounds;
476  leftBounds.max()[split.dim] = split.pos;
477 
478  AABB rightBounds = bounds;
479  rightBounds.min()[split.dim] = split.pos;
480 
481  //if (bounds.min()[split.dim] == bounds.max()[split.dim])
482  //{
483  // return FW_F32_MAX;
484  //}
485 
486  if (bounds.min()[0] == bounds.max()[0] ||
487  bounds.min()[1] == bounds.max()[1] ||
488  bounds.min()[2] == bounds.max()[2] ) // ??
489  {
490  return FW_F32_MAX;
491  }
492 
493  if ((split.pos == bounds.min()[split.dim] && nl == 0) || (split.pos == bounds.max()[split.dim] && nr == 0))
494  {
495  return FW_F32_MAX;
496  }
497 
498  const F32 probabilityLeft = leftBounds.area() / bounds.area();
499  const F32 probabilityRight = rightBounds.area() / bounds.area();
500 
501  F32 cost = probabilityLeft * m_platform.getTriangleCost(nl) + probabilityRight * m_platform.getTriangleCost(nr);
502  if ((nl == 0 || nr == 0) && !(split.pos == bounds.min()[split.dim] || split.pos == bounds.max()[split.dim])) // No bonus for flat empty cells.
503  cost *= 0.8f; // m_platform ??
504 
505  cost += m_platform.getNodeCost(1); // ??
506 
507  return cost;
508 }
509 
510 //------------------------------------------------------------------------
511 
512 void FastKDTreeBuilder::splitBounds (AABB& left, AABB& right, S32 triIdx, const Split& split) const
513 {
514  const Vec3i* tris = (const Vec3i*)m_kdtree.getScene()->getTriVtxIndexBuffer().getPtr();
515  const Vec3f* verts = (const Vec3f*)m_kdtree.getScene()->getVtxPosBuffer().getPtr();
516 
517  Vec3f vertices[] = {verts[tris[triIdx][0]], verts[tris[triIdx][1]], verts[tris[triIdx][2]]};
518 
519 #ifdef DEBUG
520  //if (split.pos > m_triData[triIdx].bounds.max()[split.dim] || split.pos < m_triData[triIdx].bounds.min()[split.dim])
521  //{
522  // FW_ASSERT(0);
523  //}
524 #endif
525 
526  int leftmostVertIdx = 0; int secondVertIdx = 0; int thirdVertIdx = 0;
527  for (int i = 0; i < 3; i++)
528  {
529  if (vertices[i][split.dim] < vertices[leftmostVertIdx][split.dim])
530  {
531  leftmostVertIdx = i;
532  }
533  }
534 
535  if (vertices[(leftmostVertIdx+1) % 3][split.dim] < vertices[(leftmostVertIdx+2) % 3][split.dim])
536  {
537  secondVertIdx = (leftmostVertIdx+1) % 3;
538  thirdVertIdx = (leftmostVertIdx+2) % 3;
539  }
540  else
541  {
542  secondVertIdx = (leftmostVertIdx+2) % 3;
543  thirdVertIdx = (leftmostVertIdx+1) % 3;
544  }
545 
546  // Actual bbox construction here.
547 
548  left = AABB(); right = AABB();
549  left.grow(vertices[leftmostVertIdx]);
550  right.grow(vertices[thirdVertIdx]);
551 
552  if (vertices[secondVertIdx][split.dim] <= split.pos)
553  {
554  F32 firstToSplit = split.pos - vertices[leftmostVertIdx][split.dim];
555  F32 secondToSplit = split.pos - vertices[secondVertIdx][split.dim];
556  F32 firstToThird = vertices[thirdVertIdx][split.dim] - vertices[leftmostVertIdx][split.dim];
557  F32 secondToThird = vertices[thirdVertIdx][split.dim] - vertices[secondVertIdx][split.dim];
558 
559  Vec3f edge1 = vertices[thirdVertIdx] - vertices[leftmostVertIdx];
560  edge1 *= firstToSplit / firstToThird;
561 
562  Vec3f edge2 = vertices[thirdVertIdx] - vertices[secondVertIdx];
563  edge2 *= secondToSplit / secondToThird;
564 
565  Vec3f newVert1 = vertices[leftmostVertIdx] + edge1;
566  Vec3f newVert2 = vertices[secondVertIdx] + edge2;
567 
568  newVert1[split.dim] = split.pos; // ??
569  newVert2[split.dim] = split.pos;
570 
571  left.grow(vertices[secondVertIdx]);
572  left.grow(newVert1);
573  left.grow(newVert2);
574  right.grow(newVert1);
575  right.grow(newVert2);
576 
577  }
578  else if (vertices[secondVertIdx][split.dim] > split.pos)
579  {
580  F32 firstToSplit = split.pos - vertices[leftmostVertIdx][split.dim];
581  F32 firstToSecond = vertices[secondVertIdx][split.dim] - vertices[leftmostVertIdx][split.dim];
582  F32 firstToThird = vertices[thirdVertIdx][split.dim] - vertices[leftmostVertIdx][split.dim];
583 
584  Vec3f edge1 = vertices[secondVertIdx] - vertices[leftmostVertIdx];
585  edge1 *= firstToSplit / firstToSecond;
586 
587  Vec3f edge2 = vertices[thirdVertIdx] - vertices[leftmostVertIdx];
588  edge2 *= firstToSplit / firstToThird;
589 
590  Vec3f newVert1 = vertices[leftmostVertIdx] + edge1;
591  Vec3f newVert2 = vertices[leftmostVertIdx] + edge2;
592 
593  newVert1[split.dim] = split.pos; // ??
594  newVert2[split.dim] = split.pos;
595 
596  left.grow(newVert1);
597  left.grow(newVert2);
598  right.grow(newVert1);
599  right.grow(newVert2);
600  right.grow(vertices[secondVertIdx]);
601  }
602 #ifdef DEBUG
603  else
604  {
605  FW_ASSERT(0);
606  }
607 
608  if(!left.valid())
609  FW_ASSERT(0);
610  if(!right.valid())
611  FW_ASSERT(0);
612 #endif
613 }
614 
615 //------------------------------------------------------------------------
616 
617 bool FastKDTreeBuilder::eventSortCompare(void* data, int idxA, int idxB)
618 {
619  const Array<Event>* ptr = (const Array<Event>*)data;
620  const Event& ea = ptr->get(idxA);
621  const Event& eb = ptr->get(idxB);
622  return FastKDTreeBuilder::eventCompare(ea, eb);
623 }
624 
625 //------------------------------------------------------------------------
626 
627 void FastKDTreeBuilder::eventSortSwap(void* data, int idxA, int idxB)
628 {
629  Array<Event>* ptr = (Array<Event>*)data;
630  swap(ptr->get(idxA), ptr->get(idxB));
631 }
632 
633 //------------------------------------------------------------------------
634 
635 bool FastKDTreeBuilder::eventCompare (const Event& eventA, const Event& eventB)
636 {
637  if (eventA.pos == eventB.pos)
638  {
639  if (eventA.dim == eventB.dim)
640  {
641  return (eventA.type < eventB.type);
642  }
643  else
644  {
645  return (eventA.dim < eventB.dim);
646  }
647  }
648  else
649  {
650  return (eventA.pos < eventB.pos);
651  }
652 }
653 
654 //------------------------------------------------------------------------
655 
656 void FastKDTreeBuilder::msort(Array<Event>& data, S32 l, S32 h)
657 {
658  if ((h - l) <= 1)
659  return;
660 
661  int middle = (h + l) / 2;
662 
663  msort(data, l, middle);
664  msort(data, middle, h);
665  mmerge(data, l, middle, h);
666 }
667 
668 //------------------------------------------------------------------------
669 
670 void FastKDTreeBuilder::mmerge(Array<Event>& data, S32 l, S32 m, S32 h)
671 {
672  S32 idxL = l;
673  S32 idxR = m;
674  S32 idxB = 0;
675  S32 pos = idxL;
676 
677  m_mergeSortBuffer.clear();
678 
679  while (pos < h)
680  {
681  if (m_mergeSortBuffer.getSize() - 1 < idxB)
682  {
683  if (idxL >= m)
684  {
685  idxR++;
686  pos++;
687  }
688  else if (idxR >= h)
689  {
690  idxL++;
691  pos++;
692  }
693  else if (eventCompare(data[idxL], data[idxR]))
694  {
695  idxL++;
696  pos++;
697  }
698  else
699  {
700  m_mergeSortBuffer.add(data[pos]);
701  data[pos] = data[idxR];
702  idxR++;
703  idxL++;
704  pos++;
705  }
706  }
707  else
708  {
709  if (idxR >= h)
710  {
711  data[pos] = m_mergeSortBuffer[idxB];
712  idxB++;
713  pos++;
714  }
715  else if (eventCompare(m_mergeSortBuffer[idxB], data[idxR]))
716  {
717  if (pos < m)
718  {
719  m_mergeSortBuffer.add(data[pos]);
720  }
721  data[pos] = m_mergeSortBuffer[idxB];
722  idxB++;
723  idxL++;
724  pos++;
725  }
726  else
727  {
728  if (pos < m)
729  {
730  m_mergeSortBuffer.add(data[pos]);
731  }
732  data[pos] = data[idxR];
733  idxR++;
734  pos++;
735  }
736  }
737  }
738 
739 #ifdef DEBUG
740 
741  for (S32 i = l; i < h - 1; i++)
742  {
743  if (eventCompare(data[i+1], data[i]))
744  FW_ASSERT(0);
745  }
746 
747 #endif
748 }
#define NULL
Definition: Defs.hpp:39
#define FW_F32_MAX
Definition: Defs.hpp:118
Buffer & getTriVtxIndexBuffer(void)
Returns buffer of triangle's vertex indieces.
Definition: Scene.hpp:75
void sort(void *data, int start, int end, SortCompareFunc compareFunc, SortSwapFunc swapFunc, bool multicore=false)
Definition: Sort.cpp:203
FW_CUDA_FUNC const Vec3f & max(void) const
Definition: Util.hpp:49
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid * data
Definition: DLLImports.inl:319
void ** ptr
Definition: DLLImports.cpp:74
Buffer & getVtxPosBuffer(void)
Returns vertex position buffer.
Definition: Scene.hpp:103
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat GLfloat GLfloat z GLuint GLint GLenum GLboolean GLsizei const GLvoid pointer GLuint GLuint const GLchar name GLenum GLsizei GLenum GLsizei GLsizei height GLenum GLuint renderbuffer GLenum GLenum GLint * params
Definition: DLLImports.inl:373
void unstart(void)
Definition: Timer.hpp:43
void start(void)
Definition: Timer.hpp:42
Strucure holding build parameters.
Definition: KDTree.hpp:85
K-d tree acceleration structure class.
Definition: KDTree.hpp:41
void clear(void)
Definition: Array.hpp:359
float getTriangleCost(S32 n) const
Calcuates cost of a given number of triangles rounded to the batch size.
Definition: Platform.hpp:95
const U8 * getPtr(S64 ofs=0)
Definition: Buffer.hpp:106
void reset(S size=0)
Definition: Array.hpp:317
const T & get(S idx) const
Definition: Array.hpp:232
float F32
Definition: Defs.hpp:89
Scene * getScene(void) const
Gets source scene of the k-d tree.
Definition: KDTree.hpp:119
K-d tree's leaf node class.
Definition: KDTreeNode.hpp:134
F32 log2(F32 a)
Definition: Math.hpp:90
FastKDTreeBuilder(KDTree &kdtree, const KDTree::BuildParams &params)
Constructor.
KDTreeNode * run(void)
Builds k-d tree.
FW_CUDA_FUNC float area(void) const
Definition: Util.hpp:45
float getNodeCost(S32 n) const
Calculates cost of a given number of nodes rounded to the batch size.
Definition: Platform.hpp:102
FW_CUDA_FUNC bool valid(void) const
Definition: Util.hpp:46
FW_CUDA_FUNC T min(const VectorBase< T, L, S > &v)
Definition: Math.hpp:461
FW_CUDA_FUNC T max(const VectorBase< T, L, S > &v)
Definition: Math.hpp:462
int getNumTriangles(void) const
Definition: Scene.hpp:61
K-d tree's inner node class.
Definition: KDTreeNode.hpp:94
#define FW_ASSERT(X)
Definition: Defs.hpp:67
signed int S32
Definition: Defs.hpp:88
T & add(void)
Definition: Array.hpp:384
FW_CUDA_FUNC const Vec3f & min(void) const
Definition: Util.hpp:48
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int dim
Definition: DLLImports.inl:74
Array< S32 > & getTriIndices(void)
Returns an array of triangle indices to which leaf nodes are pointig. These indices point to scene's ...
Definition: KDTree.hpp:137
void reserve(S numElements)
Definition: Array.hpp:376
T & insert(S idx)
Definition: Array.hpp:419
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction f
Definition: DLLImports.inl:88
FW_CUDA_FUNC void grow(const Vec3f &pt)
Definition: Util.hpp:41
void printf(const char *fmt,...)
Definition: Defs.cpp:225
void compact(void)
Definition: Array.hpp:335
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level
Definition: DLLImports.inl:333
FW_CUDA_FUNC void swap(T &a, T &b)
Definition: Defs.hpp:183
bool enablePrints
Flag whether to print information during build phase.
Definition: KDTree.hpp:98
T & removeLast(void)
Definition: Array.hpp:465
F32 getElapsed(void)
Definition: Timer.hpp:44
FW_CUDA_FUNC void intersect(const AABB &aabb)
Definition: Util.hpp:43
void resize(S size)
Definition: Array.hpp:366
S getSize(void) const
Definition: Array.hpp:188
T remove(S idx)
Definition: Array.hpp:449
K-d tree virtual parent node class.
Definition: KDTreeNode.hpp:51
FW_CUDA_FUNC A lerp(const A &a, const A &b, const B &t)
Definition: Math.hpp:115