42 m_platform (kdtree.getPlatform()),
45 m_maxDepth ((
S32)(1.2
f *
log2((FW::
F32)kdtree.getScene()->getNumTriangles()) + 2.
f)),
46 m_maxFailSplits ((
S32)(1.
f + 0.2
f * m_maxDepth))
61 m_measureTimer.
start();
65 m_mergeSortBuffer.
reset();
70 m_triData.
resize(rootSpec.numTri);
72 for (
int i = 0; i < rootSpec.numTri; i++)
78 for (
int j = 0; j < 3; j++)
79 triBounds.
grow(verts[tris[i][j]]);
80 rootSpec.bounds.grow(triBounds);
90 e.pos = triBounds.
min()[
dim];
96 e.pos = triBounds.
min()[
dim];
100 e.pos = triBounds.
max()[
dim];
108 sort(&m_evStack, 0, m_evStack.
getSize(), eventSortCompare, eventSortSwap);
111 msort(m_evStack, 0, m_evStack.
getSize());
113 rootSpec.numEv = m_evStack.
getSize();
118 std::printf(
"FastKDTreeBuilder: initialization done in %f seconds.\n", initTime);
121 m_progressTimer.
start();
125 KDTreeNode* root = buildNode(rootSpec, 0, 0, 0.0
f, 1.0
f);
129 F32 buildTime = totalTime - initTime;
133 std::printf(
"FastKDTreeBuilder: progress %.0f%%, built in %f seconds. Total time: %f seconds\n", 100.0
f, buildTime, totalTime);
141 KDTreeNode* FastKDTreeBuilder::createLeaf(
const NodeSpec& spec)
145 int stackSize = m_triStack.
getSize();
146 for (
int i = stackSize - spec.numTri; i < stackSize; i++)
158 KDTreeNode* FastKDTreeBuilder::buildNode(
const NodeSpec& spec,
int level,
int forcedSplits,
F32 progressStart,
F32 progressEnd)
164 std::printf(
"FastKDTreeBuilder: progress %.0f%%\r", progressStart * 100.0
f);
165 m_progressTimer.
start();
168 if ( level == m_maxDepth)
169 return createLeaf(spec);
172 Split split = findSplit(spec);
175 if (split.price < 0.f)
179 if (split.price / nodePrice > 0.9f)
182 if (split.price ==
FW_F32_MAX || forcedSplits > m_maxFailSplits)
183 return createLeaf(spec);
185 NodeSpec left, right;
186 performSplit(left, right, spec, split);
188 F32 progressMid =
lerp(progressStart, progressEnd, (
F32)right.numTri / (
F32)(left.numTri + right.numTri));
189 KDTreeNode* rightNode = buildNode(right, level + 1, forcedSplits, progressStart, progressMid);
190 KDTreeNode* leftNode = buildNode(left, level + 1, forcedSplits, progressMid, progressEnd);
192 return new KDTInnerNode(split.pos, split.dim, leftNode, rightNode);
197 FastKDTreeBuilder::Split FastKDTreeBuilder::findSplit(
const NodeSpec& spec)
const
211 for (
int i = 0; i < 3; i++)
213 nl[i] = 0; np[i] = 0; nr[i] = spec.numTri;
217 for (
int i = m_evStack.
getSize() - spec.numEv; i < m_evStack.
getSize();)
219 S32 numEnds = 0;
S32 numPlanar = 0;
S32 numStarts = 0;
220 const F32 pos = m_evStack.
get(i).pos;
223 while(i < m_evStack.
getSize() && m_evStack.
get(i).dim == dim &&
224 m_evStack.
get(i).pos == pos && m_evStack.
get(i).type == End)
229 while(i < m_evStack.
getSize() && m_evStack.
get(i).dim == dim &&
230 m_evStack.
get(i).pos == pos && m_evStack.
get(i).type == Planar)
235 while(i < m_evStack.
getSize() && m_evStack.
get(i).dim == dim &&
236 m_evStack.
get(i).pos == pos && m_evStack.
get(i).type == Start)
242 nr[
dim] -= numPlanar;
246 currentSplit.dim =
dim;
247 currentSplit.pos = pos;
249 F32 costLeftSide = sahPrice(currentSplit, spec.bounds, nl[dim] + np[dim], nr[dim]);
250 F32 costRightSide = sahPrice(currentSplit, spec.bounds, nl[dim], nr[dim] + np[dim]);
251 if (costLeftSide < costRightSide)
253 currentSplit.price = costLeftSide;
254 currentSplit.side = Left;
258 currentSplit.price = costRightSide;
259 currentSplit.side = Right;
263 if (currentSplit.pos < spec.bounds.min()[
dim] || currentSplit.pos > spec.bounds.max()[
dim])
267 if(currentSplit.price < bestSplit.price)
268 bestSplit = currentSplit;
270 nl[
dim] += numStarts;
271 nl[
dim] += numPlanar;
279 void FastKDTreeBuilder::performSplit (NodeSpec& left, NodeSpec& right,
const NodeSpec& spec,
const Split& split)
283 for (
int i = m_evStack.
getSize() - spec.numEv; i < m_evStack.
getSize(); i++)
285 const Event& currEv = m_evStack[i];
287 if (currEv.type == End && currEv.dim == split.dim && currEv.pos <= split.pos)
289 m_triData[currEv.triIdx].side = LeftOnly;
291 else if (currEv.type == Start && currEv.dim == split.dim && currEv.pos >= split.pos)
293 m_triData[currEv.triIdx].side = RightOnly;
295 else if (currEv.type == Planar && currEv.dim == split.dim)
297 if (currEv.pos < split.pos || (currEv.pos == split.pos && split.side == Left))
299 m_triData[currEv.triIdx].side = LeftOnly;
301 else if (currEv.pos > split.pos || (currEv.pos == split.pos && split.side == Right))
303 m_triData[currEv.triIdx].side = RightOnly;
310 for (
int i = m_evStack.
getSize() - spec.numEv; i < m_evStack.
getSize(); i++)
312 if (m_triData[m_evStack[i].triIdx].side == LeftOnly)
314 m_eventsLO.
add(m_evStack[i]);
316 else if (m_triData[m_evStack[i].triIdx].side == RightOnly)
318 m_eventsRO.
add(m_evStack[i]);
324 for (
int i = m_triStack.
getSize() - spec.numTri; i < m_triStack.
getSize(); i++)
326 if (m_triData[m_triStack[i]].side == LeftOnly)
328 m_leftTriIdx.
add(m_triStack[i]);
330 else if (m_triData[m_triStack[i]].side == RightOnly)
332 m_rightTriIdx.
add(m_triStack[i]);
337 if (m_triData[m_triStack[i]].side == Both)
340 splitBounds(leftBounds, rightBounds, m_triStack[i], split);
350 if (leftBounds.
valid())
352 m_leftTriIdx.
add(m_triStack[i]);
353 bounds[0] = &leftBounds;
355 if (rightBounds.
valid())
357 m_rightTriIdx.
add(m_triStack[i]);
358 bounds[1] = &rightBounds;
364 e.triIdx = m_triStack[i];
366 for (
int s = 0; s < 2; s++)
368 if (bounds[s] ==
NULL)
374 for (
int dim = 0; dim < 3; dim++)
378 if(min[dim] == max[dim])
398 m_triData[m_triStack[i]].side = Both;
402 sort(&m_eventsBL, 0, m_eventsBL.
getSize(), eventSortCompare, eventSortSwap);
403 sort(&m_eventsBR, 0, m_eventsBR.
getSize(), eventSortCompare, eventSortSwap);
405 msort(m_eventsBL, 0, m_eventsBL.
getSize());
406 msort(m_eventsBR, 0, m_eventsBR.
getSize());
411 S32 stackTop = m_evStack.
getSize() - spec.numEv;
414 m_evStack.
resize((m_evStack.
getSize() - spec.numEv) + left.numEv + right.numEv);
416 mergeEvents(stackTop, m_eventsLO, m_eventsBL);
417 mergeEvents(stackTop, m_eventsRO, m_eventsBR);
419 left.numTri = m_leftTriIdx.
getSize();
420 right.numTri = m_rightTriIdx.
getSize();
422 stackTop = m_triStack.
getSize() - spec.numTri;
425 m_triStack.
insert(stackTop, m_leftTriIdx);
426 stackTop += left.numTri;
427 m_triStack.
insert(stackTop, m_rightTriIdx);
431 left.bounds = spec.bounds;
432 left.bounds.max()[split.dim] = split.pos;
434 right.bounds = spec.bounds;
435 right.bounds.min()[split.dim] = split.pos;
441 m_leftTriIdx.
clear();
442 m_rightTriIdx.
clear();
449 int idxA = 0;
int idxB = 0;
454 m_evStack[stackTop++] = b.
get(idxB++);
458 m_evStack[stackTop++] = a.
get(idxA++);
460 else if (FastKDTreeBuilder::eventCompare(a.
get(idxA), b.
get(idxB)))
462 m_evStack[stackTop++] = a.
get(idxA++);
466 m_evStack[stackTop++] = b.
get(idxB++);
473 F32 FastKDTreeBuilder::sahPrice(
const Split& split,
const AABB& bounds,
S32 nl,
S32 nr)
const
475 AABB leftBounds = bounds;
476 leftBounds.
max()[split.dim] = split.pos;
478 AABB rightBounds = bounds;
479 rightBounds.
min()[split.dim] = split.pos;
486 if (bounds.
min()[0] == bounds.
max()[0] ||
487 bounds.
min()[1] == bounds.
max()[1] ||
488 bounds.
min()[2] == bounds.
max()[2] )
493 if ((split.pos == bounds.
min()[split.dim] && nl == 0) || (split.pos == bounds.
max()[split.dim] && nr == 0))
498 const F32 probabilityLeft = leftBounds.
area() / bounds.
area();
499 const F32 probabilityRight = rightBounds.
area() / bounds.
area();
502 if ((nl == 0 || nr == 0) && !(split.pos == bounds.
min()[split.dim] || split.pos == bounds.
max()[split.dim]))
512 void FastKDTreeBuilder::splitBounds (
AABB& left,
AABB& right,
S32 triIdx,
const Split& split)
const
517 Vec3f vertices[] = {verts[tris[triIdx][0]], verts[tris[triIdx][1]], verts[tris[triIdx][2]]};
526 int leftmostVertIdx = 0;
int secondVertIdx = 0;
int thirdVertIdx = 0;
527 for (
int i = 0; i < 3; i++)
529 if (vertices[i][split.dim] < vertices[leftmostVertIdx][split.dim])
535 if (vertices[(leftmostVertIdx+1) % 3][split.dim] < vertices[(leftmostVertIdx+2) % 3][split.dim])
537 secondVertIdx = (leftmostVertIdx+1) % 3;
538 thirdVertIdx = (leftmostVertIdx+2) % 3;
542 secondVertIdx = (leftmostVertIdx+2) % 3;
543 thirdVertIdx = (leftmostVertIdx+1) % 3;
549 left.
grow(vertices[leftmostVertIdx]);
550 right.
grow(vertices[thirdVertIdx]);
552 if (vertices[secondVertIdx][split.dim] <= split.pos)
554 F32 firstToSplit = split.pos - vertices[leftmostVertIdx][split.dim];
555 F32 secondToSplit = split.pos - vertices[secondVertIdx][split.dim];
556 F32 firstToThird = vertices[thirdVertIdx][split.dim] - vertices[leftmostVertIdx][split.dim];
557 F32 secondToThird = vertices[thirdVertIdx][split.dim] - vertices[secondVertIdx][split.dim];
559 Vec3f edge1 = vertices[thirdVertIdx] - vertices[leftmostVertIdx];
560 edge1 *= firstToSplit / firstToThird;
562 Vec3f edge2 = vertices[thirdVertIdx] - vertices[secondVertIdx];
563 edge2 *= secondToSplit / secondToThird;
565 Vec3f newVert1 = vertices[leftmostVertIdx] + edge1;
566 Vec3f newVert2 = vertices[secondVertIdx] + edge2;
568 newVert1[split.dim] = split.pos;
569 newVert2[split.dim] = split.pos;
571 left.
grow(vertices[secondVertIdx]);
574 right.
grow(newVert1);
575 right.
grow(newVert2);
578 else if (vertices[secondVertIdx][split.dim] > split.pos)
580 F32 firstToSplit = split.pos - vertices[leftmostVertIdx][split.dim];
581 F32 firstToSecond = vertices[secondVertIdx][split.dim] - vertices[leftmostVertIdx][split.dim];
582 F32 firstToThird = vertices[thirdVertIdx][split.dim] - vertices[leftmostVertIdx][split.dim];
584 Vec3f edge1 = vertices[secondVertIdx] - vertices[leftmostVertIdx];
585 edge1 *= firstToSplit / firstToSecond;
587 Vec3f edge2 = vertices[thirdVertIdx] - vertices[leftmostVertIdx];
588 edge2 *= firstToSplit / firstToThird;
590 Vec3f newVert1 = vertices[leftmostVertIdx] + edge1;
591 Vec3f newVert2 = vertices[leftmostVertIdx] + edge2;
593 newVert1[split.dim] = split.pos;
594 newVert2[split.dim] = split.pos;
598 right.
grow(newVert1);
599 right.
grow(newVert2);
600 right.
grow(vertices[secondVertIdx]);
617 bool FastKDTreeBuilder::eventSortCompare(
void*
data,
int idxA,
int idxB)
620 const Event& ea = ptr->
get(idxA);
621 const Event& eb = ptr->
get(idxB);
622 return FastKDTreeBuilder::eventCompare(ea, eb);
627 void FastKDTreeBuilder::eventSortSwap(
void* data,
int idxA,
int idxB)
635 bool FastKDTreeBuilder::eventCompare (
const Event& eventA,
const Event& eventB)
637 if (eventA.pos == eventB.pos)
639 if (eventA.dim == eventB.dim)
641 return (eventA.type < eventB.type);
645 return (eventA.dim < eventB.dim);
650 return (eventA.pos < eventB.pos);
661 int middle = (h + l) / 2;
663 msort(data, l, middle);
664 msort(data, middle, h);
665 mmerge(data, l, middle, h);
677 m_mergeSortBuffer.
clear();
681 if (m_mergeSortBuffer.
getSize() - 1 < idxB)
693 else if (eventCompare(data[idxL], data[idxR]))
700 m_mergeSortBuffer.
add(data[pos]);
701 data[pos] = data[idxR];
711 data[pos] = m_mergeSortBuffer[idxB];
715 else if (eventCompare(m_mergeSortBuffer[idxB], data[idxR]))
719 m_mergeSortBuffer.
add(data[pos]);
721 data[pos] = m_mergeSortBuffer[idxB];
730 m_mergeSortBuffer.
add(data[pos]);
732 data[pos] = data[idxR];
741 for (
S32 i = l; i < h - 1; i++)
743 if (eventCompare(data[i+1], data[i]))
Buffer & getTriVtxIndexBuffer(void)
Returns buffer of triangle's vertex indieces.
void sort(void *data, int start, int end, SortCompareFunc compareFunc, SortSwapFunc swapFunc, bool multicore=false)
FW_CUDA_FUNC const Vec3f & max(void) const
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid * data
Buffer & getVtxPosBuffer(void)
Returns vertex position buffer.
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat GLfloat GLfloat z GLuint GLint GLenum GLboolean GLsizei const GLvoid pointer GLuint GLuint const GLchar name GLenum GLsizei GLenum GLsizei GLsizei height GLenum GLuint renderbuffer GLenum GLenum GLint * params
Strucure holding build parameters.
K-d tree acceleration structure class.
const U8 * getPtr(S64 ofs=0)
const T & get(S idx) const
Scene * getScene(void) const
Gets source scene of the k-d tree.
K-d tree's leaf node class.
FastKDTreeBuilder(KDTree &kdtree, const KDTree::BuildParams ¶ms)
Constructor.
KDTreeNode * run(void)
Builds k-d tree.
FW_CUDA_FUNC float area(void) const
FW_CUDA_FUNC bool valid(void) const
FW_CUDA_FUNC T min(const VectorBase< T, L, S > &v)
FW_CUDA_FUNC T max(const VectorBase< T, L, S > &v)
int getNumTriangles(void) const
K-d tree's inner node class.
FW_CUDA_FUNC const Vec3f & min(void) const
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int dim
Array< S32 > & getTriIndices(void)
Returns an array of triangle indices to which leaf nodes are pointig. These indices point to scene's ...
void reserve(S numElements)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction f
FW_CUDA_FUNC void grow(const Vec3f &pt)
void printf(const char *fmt,...)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level
FW_CUDA_FUNC void swap(T &a, T &b)
bool enablePrints
Flag whether to print information during build phase.
FW_CUDA_FUNC void intersect(const AABB &aabb)
K-d tree virtual parent node class.
FW_CUDA_FUNC A lerp(const A &a, const A &b, const B &t)