NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
RayBuffer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2011, NVIDIA Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA Corporation nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "ray/RayBuffer.hpp"
29 #include "ray/RayBufferKernels.hpp"
30 #include "base/Math.hpp"
31 #include "base/Sort.hpp"
32 #include "base/Random.hpp"
33 #include "gpu/CudaCompiler.hpp"
34 
35 namespace FW
36 {
37 
39 {
40  FW_ASSERT(n >= 0);
41  if (n < m_size)
42  {
43  m_size = n;
44  return;
45  }
46 
47  m_size = n;
48  m_rays.resize(n * sizeof(Ray));
49  m_results.resize(n * sizeof(RayResult));
50  m_IDToSlot.resize(n * sizeof(S32));
51  m_slotToID.resize(n * sizeof(S32));
52 }
53 
54 void RayBuffer::setRay(S32 slot, const Ray& ray, S32 id)
55 {
56  FW_ASSERT(slot >= 0 && slot < m_size);
57  FW_ASSERT(id >= 0 && id < m_size);
58 
59  ((Ray*)m_rays.getMutablePtr())[slot] = ray;
60  ((S32*)m_IDToSlot.getMutablePtr())[id] = slot;
61  ((S32*)m_slotToID.getMutablePtr())[slot] = id;
62 }
63 
64 //-------------------------------------------------------------------
65 
66 void RayBuffer::randomSort(U32 randomSeed)
67 {
68  // Reorder rays.
69 
70  Ray* rays = (Ray*)getRayBuffer().getMutablePtr();
71  S32* idToSlot = (S32*)getIDToSlotBuffer().getMutablePtr();
72  S32* slotToID = (S32*)getSlotToIDBuffer().getMutablePtr();
73  Random random(randomSeed);
74 
75  for (int slot = 0; slot < m_size; slot++)
76  {
77  S32 slot2 = random.getS32(m_size - slot) + slot;
78 
79  S32 id = slotToID[slot];
80  S32 id2 = slotToID[slot2];
81 
82  swap(rays[slot], rays[slot2]);
83  swap(slotToID[slot],slotToID[slot2]);
84  swap(idToSlot[id], idToSlot[id2]);
85  }
86 }
87 
88 //-------------------------------------------------------------------
89 
90 static bool compareMortonKey(void* data, int idxA, int idxB)
91 {
92  const MortonKey& a = ((const MortonKey*)data)[idxA];
93  const MortonKey& b = ((const MortonKey*)data)[idxB];
94  if (a.hash[5] != b.hash[5]) return (a.hash[5] < b.hash[5]);
95  if (a.hash[4] != b.hash[4]) return (a.hash[4] < b.hash[4]);
96  if (a.hash[3] != b.hash[3]) return (a.hash[3] < b.hash[3]);
97  if (a.hash[2] != b.hash[2]) return (a.hash[2] < b.hash[2]);
98  if (a.hash[1] != b.hash[1]) return (a.hash[1] < b.hash[1]);
99  if (a.hash[0] != b.hash[0]) return (a.hash[0] < b.hash[0]);
100  return false;
101 }
102 
104 {
105  // Compile kernels.
106 
107  CudaCompiler compiler;
108  compiler.setSourceFile("src/rt/ray/RayBufferKernels.cu");
109  compiler.addOptions("-use_fast_math");
110  compiler.include("src/rt");
111  compiler.include("src/framework");
112  CudaModule* module = compiler.compile();
113 
114  // Allocate temporary buffers.
115 
116  Buffer oldRayBuffer = getRayBuffer();
117  Buffer oldSlotToIDBuffer = getSlotToIDBuffer();
118  Buffer keyBuffer(NULL, getSize() * sizeof(MortonKey));
119 
120  // Find AABB of the rays.
121  {
122  FindAABBInput& in = *(FindAABBInput*)module->getGlobal("c_FindAABBInput").getMutablePtr();
123  FindAABBOutput& out = *(FindAABBOutput*)module->getGlobal("c_FindAABBOutput").getMutablePtr();
124  in.numRays = getSize();
125  in.inRays = getRayBuffer().getCudaPtr();
126  in.raysPerThread = 32;
127  out.aabbLo = Vec3f(+FW_F32_MAX);
128  out.aabbHi = Vec3f(-FW_F32_MAX);
129 
130  module->getKernel("findAABBKernel").launch(
131  (in.numRays - 1) / in.raysPerThread + 1,
133  }
134 
135  // Generate keys.
136  {
137  const FindAABBOutput& aabb = *(const FindAABBOutput*)module->getGlobal("c_FindAABBOutput").getPtr();
138  GenMortonKeysInput& in = *(GenMortonKeysInput*)module->getGlobal("c_GenMortonKeysInput").getMutablePtr();
139  in.numRays = getSize();
140  in.aabbLo = aabb.aabbLo;
141  in.aabbHi = aabb.aabbHi;
142  in.inRays = getRayBuffer().getCudaPtr();
143  in.outKeys = keyBuffer.getMutableCudaPtr();
144  module->getKernel("genMortonKeysKernel").launch(getSize(), Vec2i(32, 4));
145  }
146 
147  // Sort keys.
148 
149  sort((MortonKey*)keyBuffer.getMutablePtr(), getSize(), compareMortonKey, sortDefaultSwap<MortonKey>, true);
150 
151  // Reorder rays.
152  {
153  ReorderRaysInput& in = *(ReorderRaysInput*)module->getGlobal("c_ReorderRaysInput").getMutablePtr();
154  in.numRays = getSize();
155  in.inKeys = keyBuffer.getCudaPtr();
156  in.inRays = oldRayBuffer.getCudaPtr();
157  in.inSlotToID = oldSlotToIDBuffer.getCudaPtr();
161  module->getKernel("reorderRaysKernel").launch(getSize());
162  }
163 }
164 
165 
166 } //
S32 getSize() const
Gets size of the buffer (number of rays).
Definition: RayBuffer.hpp:52
#define NULL
Definition: Defs.hpp:39
#define FW_F32_MAX
Definition: Defs.hpp:118
void include(const String &path)
void sort(void *data, int start, int end, SortCompareFunc compareFunc, SortSwapFunc swapFunc, bool multicore=false)
Definition: Sort.cpp:203
CudaModule * compile(bool enablePrints=true, bool autoFail=true)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule * module
Definition: DLLImports.inl:60
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid * data
Definition: DLLImports.inl:319
Buffer & getIDToSlotBuffer()
Gets buffer mapping ids to slots.
Definition: RayBuffer.hpp:179
CUdeviceptr getCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:108
CudaKernel getKernel(const String &name)
Definition: CudaModule.cpp:80
Definition: Util.hpp:62
const U8 * getPtr(S64 ofs=0)
Definition: Buffer.hpp:106
Buffer & getRayBuffer()
Gets ray buffer.
Definition: RayBuffer.hpp:167
S32 getS32(void)
Definition: Random.hpp:54
Buffer & getSlotToIDBuffer()
Gets buffer slots to ids.
Definition: RayBuffer.hpp:185
CUdeviceptr getMutableCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:112
U8 * getMutablePtr(S64 ofs=0)
Definition: Buffer.hpp:110
void randomSort(U32 randomSeed=0)
Shuffles rays in the buffer.
Definition: RayBuffer.cpp:66
#define FW_ASSERT(X)
Definition: Defs.hpp:67
signed int S32
Definition: Defs.hpp:88
unsigned int U32
Definition: Defs.hpp:85
Buffer & getGlobal(const String &name)
Definition: CudaModule.cpp:117
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei n
Definition: DLLImports.inl:325
CudaKernel & launch(void)
Definition: CudaKernel.cpp:179
void setRay(S32 slot, const Ray &ray)
Assigns ray to a slot. Id is same as slot.
Definition: RayBuffer.hpp:65
FW_CUDA_FUNC void swap(T &a, T &b)
Definition: Defs.hpp:183
void resize(S32 n)
Resizes the buffer.
Definition: RayBuffer.cpp:38
void addOptions(const String &options)
void setSourceFile(const String &path)
void resize(S64 size)
Definition: Buffer.hpp:82
void mortonSort()
Performs morton sort.
Definition: RayBuffer.cpp:103