NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
RayGen.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2011, NVIDIA Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA Corporation nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "ray/RayGen.hpp"
29 #include "ray/RayGenKernels.hpp"
30 #include "base/Random.hpp"
31 
32 namespace FW
33 {
34 
35 RayGen::RayGen(S32 maxBatchSize)
36 : m_maxBatchSize(maxBatchSize)
37 {
38  m_compiler.setSourceFile("src/rt/ray/RayGenKernels.cu");
39  m_compiler.addOptions("-use_fast_math");
40  m_compiler.include("src/rt");
41  m_compiler.include("src/framework");
42 }
43 
44 void RayGen::primary(RayBuffer& orays, const Vec3f& origin, const Mat4f& nscreenToWorld, S32 w,S32 h,float maxDist)
45 {
46  // doesn't do batching
47 
48  m_pixelTable.setSize(Vec2i(w, h));
49  orays.resize(w * h);
50  orays.setNeedClosestHit(true);
51 
52  // Compile kernel.
53 
54  CudaModule* module = m_compiler.compile();
55 
56  // Setup input struct.
57 
58  RayGenPrimaryInput& in = *(RayGenPrimaryInput*)module->getGlobal("c_RayGenPrimaryInput").getMutablePtr();
59  in.origin = origin;
60  in.nscreenToWorld = nscreenToWorld;
61  in.w = w;
62  in.h = h;
63  in.maxDist = maxDist;
64  in.rays = orays.getRayBuffer().getMutableCudaPtr();
67  in.indexToPixel = m_pixelTable.getIndexToPixel().getCudaPtr();
68 
69  // Launch.
70 
71  module->getKernel("rayGenPrimaryKernel").launch(orays.getSize());
72 }
73 
74 void RayGen::primaryCPU(RayBuffer& orays, const Vec3f& origin, const Mat4f& nscreenToWorld, S32 w,S32 h,float maxDist)
75 {
76  // doesn't do batching
77 
78  m_pixelTable.setSize(Vec2i(w, h));
79  orays.resize(w * h);
80  orays.setNeedClosestHit(true);
81 
82  // Compute end position.
83  for(S32 i = 0; i < orays.getSize(); i++)
84  {
85  int pixel = ((const S32*)m_pixelTable.getIndexToPixel().getMutablePtr())[i];
86  int posIdx = pixel;
87  // int posIdx = ((const S32*)in.indexToPixel)[(taskIdx & 31) + 1024 * 768 / 2 + 1234];
88  // int posIdx = ((const S32*)in.indexToPixel)[(taskIdx & 0) + 1024 * 768 / 2 + 1234];
89 
90  Vec4f nscreenPos;
91  nscreenPos.x = 2.0f * ((F32)(posIdx % w) + 0.5f) / (F32)w - 1.0f;
92  nscreenPos.y = 2.0f * ((F32)(posIdx / w) + 0.5f) / (F32)h - 1.0f;
93  nscreenPos.z = 0.0f;
94  nscreenPos.w = 1.0f;
95 
96  Vec4f worldPos4D = nscreenToWorld * nscreenPos;
97  Vec3f worldPos = worldPos4D.getXYZ() / worldPos4D.w;
98 
99  // Write results.
100 
101  Ray& ray = ((Ray*)orays.getRayBuffer().getMutablePtr())[i];
102  ((S32*)orays.getSlotToIDBuffer().getMutablePtr())[i] = pixel;
103  ((S32*)orays.getIDToSlotBuffer().getMutablePtr())[pixel] = i;
104 
105  ray.origin = origin;
106  ray.direction = (worldPos - origin).normalized();
107  ray.tmin = 0.0f;
108  ray.tmax = maxDist;
109  }
110 }
111 
112 bool RayGen::shadow(RayBuffer& orays, RayBuffer& irays, int numSamples, const Vec3f& lightPos, float lightRadius, bool& newBatch, U32 randomSeed)
113 {
114  // batching
115  S32 lo,hi;
116  if( !batching(irays.getSize(),numSamples, m_shadowStartIdx,newBatch, lo,hi) )
117  return false;
118 
119  // allocate output array
120  orays.resize((hi-lo)*numSamples);
121  orays.setNeedClosestHit(false);
122 
123  // Compile kernel.
124 
125  CudaModule* module = m_compiler.compile();
126 
127  // Setup input struct.
128 
129  RayGenShadowInput& in = *(RayGenShadowInput*)module->getGlobal("c_RayGenShadowInput").getMutablePtr();
130  in.firstInputSlot = lo;
131  in.numInputRays = hi - lo;
132  in.numSamples = numSamples;
133  in.lightPositionX = lightPos.x;
134  in.lightPositionY = lightPos.y;
135  in.lightPositionZ = lightPos.z;
136  in.lightRadius = lightRadius;
137  in.randomSeed = Random(randomSeed).getU32();
138  in.inRays = irays.getRayBuffer().getCudaPtr();
139  in.inResults = irays.getResultBuffer().getCudaPtr();
140  in.outRays = orays.getRayBuffer().getMutableCudaPtr();
143 
144  // Launch.
145 
146  module->getKernel("rayGenShadowKernel").launch(in.numInputRays);
147  return true;
148 }
149 
150 bool RayGen::shadowCPU(RayBuffer& orays, RayBuffer& irays, int numSamples, const Vec3f& lightPos, float lightRadius, bool& newBatch, U32 randomSeed)
151 {
152  const float epsilon = 1e-3f;
153 
154  // batching
155  S32 lo,hi;
156  if( !batching(irays.getSize(), numSamples, m_shadowStartIdx, newBatch, lo, hi) )
157  return false;
158 
159  // allocate output array
160  const S32 numOutputRays = (hi-lo)*numSamples;
161  orays.resize(numOutputRays);
162  Random rnd(randomSeed);
163 
164  // raygen
165  for(int i=lo;i<hi;i++)
166  {
167  const Ray& iray = irays.getRayForSlot(i);
168  const RayResult& irayres = irays.getResultForSlot(i);
169 
170  const float t = max(0.f,(irayres.t-epsilon)); // backtrack a little bit
171  const Vec3f origin = iray.origin + t*iray.direction;
172 
173  for(int j=0;j<numSamples;j++)
174  {
175  Vec3f target = lightPos;
176  Vec3f direction = target - origin;
177 
178  Ray oray;
179  oray.origin = origin;
180  oray.direction = direction.normalized();
181  oray.tmin = 0.f;
182  oray.tmax = direction.length();
183 
184  if(!irayres.hit())
185  oray.degenerate();
186 
187  const S32 oindex = (i-lo)*numSamples+j;
188  orays.setRay(oindex, oray);
189  }
190  }
191 
192  orays.setNeedClosestHit(false);
193  return true;
194 }
195 
196 bool RayGen::ao(RayBuffer& orays, RayBuffer& irays, Scene& scene, int numSamples, float maxDist, bool& newBatch, U32 randomSeed)
197 {
198  // Perform batching and setup output array.
199 
200  S32 lo, hi;
201  if(!batching(irays.getSize(), numSamples, m_aoStartIdx, newBatch, lo, hi))
202  return false;
203 
204  orays.resize((hi - lo) * numSamples);
205  orays.setNeedClosestHit(false);
206 
207  // Compile kernel.
208 
209  CudaModule* module = m_compiler.compile();
210 
211  // Setup input struct.
212 
213  RayGenAOInput& in = *(RayGenAOInput*)module->getGlobal("c_RayGenAOInput").getMutablePtr();
214  in.firstInputSlot = lo;
215  in.numInputRays = hi - lo;
216  in.numSamples = numSamples;
217  in.maxDist = maxDist;
218  in.randomSeed = Random(randomSeed).getU32();
219  in.inRays = irays.getRayBuffer().getCudaPtr();
220  in.inResults = irays.getResultBuffer().getCudaPtr();
221  in.outRays = orays.getRayBuffer().getMutableCudaPtr();
224  in.normals = scene.getTriNormalBuffer().getCudaPtr();
225 
226  // Launch.
227 
228  module->getKernel("rayGenAOKernel").launch(in.numInputRays);
229  return true;
230 }
231 
232 bool RayGen::aoCPU(RayBuffer& orays, RayBuffer& irays, Scene& scene, int numSamples, float maxDist, bool& newBatch, U32 randomSeed)
233 {
234  /*const float epsilon = 1e-3f;
235 
236  // Perform batching and setup output array.
237 
238  S32 lo, hi;
239  if(!batching(irays.getSize(), numSamples, m_aoStartIdx, newBatch, lo, hi))
240  return false;
241 
242  orays.resize((hi - lo) * numSamples);
243  orays.setNeedClosestHit(false);
244 
245  Random rnd(randomSeed);
246  const Scene::Triangle* triangles = (const Scene::Triangle*)scene.getTrianglePtr();
247 
248  // raygen
249  for(int i=lo;i<hi;i++)
250  {
251  const Ray& iray = irays.getRayForSlot(i);
252  const RayResult& irayres = irays.getResultForSlot(i);
253 
254  const float t = max(0.f,(irayres.t-epsilon)); // backtrack a little bit
255  const Vec3f origin = iray.origin + t*iray.direction;
256 
257  // Lookup normal, flipping back-facing directions.
258 
259  int tri = irayres.id;
260  Vec3f normal(1.0f, 0.0f, 0.0f);
261  if (tri != -1)
262  normal = triangles[tri].normal;
263  if (dot(normal, iray.direction) > 0.0f)
264  normal = -normal;
265 
266  // Construct perpendicular vectors.
267 
268  Vec3f na = abs(normal);
269  F32 nm = max(max(na.x, na.y), na.z);
270  Vec3f perp(normal.y, -normal.x, 0.0f); // assume y is largest
271  if (nm == na.z)
272  perp = Vec3f(0.0f, normal.z, -normal.y);
273  else if (nm == na.x)
274  perp = Vec3f(-normal.z, 0.0f, normal.x);
275 
276  perp = normalize(perp);
277  Vec3f biperp = cross(normal, perp);
278 
279  // Pick random rotation angle.
280 
281  F32 angle = 2.0f * FW_PI * rnd.getF32(-1.0f, 1.0f);
282 
283  // Construct rotated tangent vectors.
284 
285  Vec3f t0 = perp * cosf(angle) + biperp * sinf(angle);
286  Vec3f t1 = perp * -sinf(angle) + biperp * cosf(angle);
287 
288  for(int j=0;j<numSamples;j++)
289  {
290  // Base-2 Halton sequence for X.
291 
292  F32 x = 0.0f;
293  F32 xadd = 1.0f;
294  unsigned int hc2 = j + 1;
295  while (hc2 != 0)
296  {
297  xadd *= 0.5f;
298  if ((hc2 & 1) != 0)
299  x += xadd;
300  hc2 >>= 1;
301  }
302 
303  // Base-3 Halton sequence for Y.
304 
305  F32 y = 0.0f;
306  F32 yadd = 1.0f;
307  int hc3 = j + 1;
308  while (hc3 != 0)
309  {
310  yadd *= 1.0f / 3.0f;
311  y += (F32)(hc3 % 3) * yadd;
312  hc3 /= 3;
313  }
314 
315  // Warp to a point on the unit hemisphere.
316 
317  F32 angle = 2.0f * FW_PI * y;
318  F32 r = sqrtf(x);
319  x = r * cosf(angle);
320  y = r * sinf(angle);
321  float z = sqrtf(1.0f - x * x - y * y);
322 
323  // Output ray.
324 
325  Ray oray;
326  oray.origin = origin;
327  oray.direction = normalize(x * t0 + y * t1 + z * normal);
328  oray.tmin = 0.0f;
329  oray.tmax = (tri == -1) ? -1.0f : maxDist;
330 
331  const S32 oindex = (i-lo)*numSamples+j;
332  orays.setRay(oindex, oray);
333  }
334  }
335 
336  orays.setNeedClosestHit(false);*/
337  return true;
338 }
339 
340 bool RayGen::random (RayBuffer& orays, const AABB& bounds, int numRays, bool closestHit, bool PosDir, U32 randomSeed)
341 {
342  bool temp = true;
343  return random(orays,bounds,numRays,closestHit, temp, PosDir, randomSeed);
344 }
345 
346 bool RayGen::random (RayBuffer& orays, const AABB& bounds, int numRays, bool closestHit, bool PosDir, bool& newBatch, U32 randomSeed)
347 {
348  S32 lo,hi;
349  if( !batching(numRays,1, m_randomStartIdx, newBatch, lo,hi) )
350  return false;
351 
352  const S32 numOutputRays = (hi-lo);
353  orays.resize(numOutputRays);
354  Random rnd(randomSeed);
355 
356  for(int i=0;i<numRays;i++)
357  {
358  Vec3f a = rnd.getVec3f(0.0f, 1.0f);
359  Vec3f b = rnd.getVec3f(0.0f, 1.0f);
360 
361  Ray oray;
362  oray.origin = bounds.min() + a*(bounds.max() - bounds.min());
363  if(PosDir) oray.direction = b.normalized() * (bounds.max() - bounds.min()).length(); // position, direction
364  else oray.direction = bounds.min() + b*(bounds.max() - bounds.min()) - oray.origin; // position, position
365  oray.tmin = 0.f;
366  oray.tmax = 1.f;
367  orays.setRay(i,oray);
368  }
369 
370  orays.setNeedClosestHit(closestHit);
371  return true;
372 }
373 
374 bool RayGen::randomReflection (RayBuffer& orays, RayBuffer& irays, Scene& scene, int numSamples, float maxDist, bool& newBatch, U32 randomSeed)
375 {
376  const float epsilon = 1e-4f;
377 
378  // batching
379  S32 lo,hi;
380  if( !batching(irays.getSize(),numSamples, m_randomStartIdx,newBatch, lo,hi) )
381  return false;
382 
383  // allocate output array
384  const S32 numOutputRays = (hi-lo)*numSamples;
385  orays.resize(numOutputRays);
386  Random rnd(randomSeed);
387 
388  // raygen
389  const Vec3f* normals = (const Vec3f*)scene.getTriNormalBuffer().getPtr();
390  for(int i=lo;i<hi;i++)
391  {
392  const Ray& iray = irays.getRayForSlot(i);
393  const RayResult& irayres = irays.getResultForSlot(i);
394 
395  const float t = max(0.f,(irayres.t-epsilon)); // backtrack a little bit
396  const Vec3f origin = iray.origin + t*iray.direction;
397  Vec3f normal = irayres.hit() ? normals[irayres.id] : Vec3f(0.f);
398  if(dot(normal,iray.direction) > 0.f)
399  normal = -normal;
400 
401  for(int j=0;j<numSamples;j++)
402  {
403  Ray oray;
404 
405  if(irayres.hit())
406  {
407  oray.origin = origin;
408 
409  do{
410  oray.direction.x = rnd.getF32();
411  oray.direction.y = rnd.getF32();
412  oray.direction.z = rnd.getF32();
413  oray.direction.normalize();
414  } while(dot(oray.direction,normal)<0.f);
415 
416  oray.tmin = 0.f;
417  oray.tmax = maxDist;
418  }
419  else
420  oray.degenerate();
421 
422  const S32 oindex = (i-lo)*numSamples+j;
423  orays.setRay(oindex, oray);
424  }
425  }
426 
427  orays.setNeedClosestHit(false);
428  return true;
429 }
430 
431 bool RayGen::batching(S32 numInputRays,S32 numSamples,S32& startIdx,bool& newBatch, S32& lo,S32& hi)
432 {
433  if(newBatch)
434  {
435  newBatch = false;
436  startIdx = 0;
437  }
438 
439  if(startIdx == numInputRays)
440  return false; // finished
441 
442  // current index [lo,hi) in *input* array
443  lo = startIdx;
444  hi = min(numInputRays, lo+m_maxBatchSize/numSamples);
445 
446  // for the next round
447  startIdx = hi;
448  return true; // continues
449 }
450 
451 } //
FW_CUDA_FUNC T length(const VectorBase< T, L, S > &v)
Definition: Math.hpp:459
S32 getSize() const
Gets size of the buffer (number of rays).
Definition: RayBuffer.hpp:52
void primaryCPU(RayBuffer &orays, const Vec3f &origin, const Mat4f &nscreenToWorld, S32 w, S32 h, float maxDist)
Generates primary rays on the CPU.
Definition: RayGen.cpp:74
void include(const String &path)
FW_CUDA_FUNC const Vec3f & max(void) const
Definition: Util.hpp:49
CudaModule * compile(bool enablePrints=true, bool autoFail=true)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule * module
Definition: DLLImports.inl:60
Buffer & getIDToSlotBuffer()
Gets buffer mapping ids to slots.
Definition: RayBuffer.hpp:179
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat GLfloat GLfloat z GLuint GLint GLenum GLboolean normalized
Definition: DLLImports.inl:365
Buffer & getTriNormalBuffer(void)
Returns triangle normal buffer.
Definition: Scene.hpp:82
bool ao(RayBuffer &orays, RayBuffer &irays, Scene &scene, int numSamples, float maxDist, bool &newBatch, U32 randomSeed=0)
Generates ao rays on the GPU. Batches rays if necessary.
Definition: RayGen.cpp:196
float t
Definition: Util.hpp:84
RayGen(S32 maxBatchSize=8 *1024 *1024)
Constructor.
Definition: RayGen.cpp:35
FW_CUDA_FUNC bool hit(void) const
Definition: Util.hpp:80
CUdeviceptr getCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:108
bool random(RayBuffer &orays, const AABB &bounds, int numRays, bool closestHit, bool PosDir=false, U32 randomSeed=0)
Generates random rays. Used for various tests.
Definition: RayGen.cpp:340
CUdeviceptr inResults
bool randomReflection(RayBuffer &orays, RayBuffer &irays, Scene &scene, int numSamples, float maxDist, bool &newBatch, U32 randomSeed=0)
Generates random reflection rays.
Definition: RayGen.cpp:374
CudaKernel getKernel(const String &name)
Definition: CudaModule.cpp:80
FW_CUDA_FUNC Vec3f getXYZ(void) const
Definition: Math.hpp:365
Definition: Util.hpp:62
bool shadowCPU(RayBuffer &orays, RayBuffer &irays, int numSamples, const Vec3f &lightPos, float lightRadius, bool &newBatch, U32 randomSeed=0)
Generates shadow rays on the CPU. Batches rays if necessary.
Definition: RayGen.cpp:150
Definitions for the ray generator class.
const Ray & getRayForSlot(S32 slot) const
Gets a ray assigned to a given slot.
Definition: RayBuffer.hpp:89
const U8 * getPtr(S64 ofs=0)
Definition: Buffer.hpp:106
FW_CUDA_FUNC T dot(const VectorBase< T, L, S > &a, const VectorBase< T, L, V > &b)
Definition: Math.hpp:477
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum target
Definition: DLLImports.inl:315
Buffer & getRayBuffer()
Gets ray buffer.
Definition: RayBuffer.hpp:167
void setSize(const Vec2i &size)
Definition: PixelTable.cpp:47
Vec3f origin
Definition: Util.hpp:67
Buffer & getSlotToIDBuffer()
Gets buffer slots to ids.
Definition: RayBuffer.hpp:185
float F32
Definition: Defs.hpp:89
CUdeviceptr getMutableCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:112
FW_CUDA_FUNC void degenerate(void)
Definition: Util.hpp:65
Buffer & getResultBuffer()
Gets ray result buffer.
Definition: RayBuffer.hpp:173
CUdeviceptr outRays
Ray buffer class. Stores rays.
Definition: RayBuffer.hpp:38
U8 * getMutablePtr(S64 ofs=0)
Definition: Buffer.hpp:110
CUdeviceptr inRays
FW_CUDA_FUNC T min(const VectorBase< T, L, S > &v)
Definition: Math.hpp:461
Vec3f direction
Definition: Util.hpp:69
void primary(RayBuffer &orays, const Vec3f &origin, const Mat4f &nscreenToWorld, S32 w, S32 h, float maxDist)
Generates primary rays on the GPU.
Definition: RayGen.cpp:44
FW_CUDA_FUNC T max(const VectorBase< T, L, S > &v)
Definition: Math.hpp:462
float tmax
Definition: Util.hpp:70
signed int S32
Definition: Defs.hpp:88
bool shadow(RayBuffer &orays, RayBuffer &irays, int numSamples, const Vec3f &lightPos, float lightRadius, bool &newBatch, U32 randomSeed=0)
Generates shadow rays on the GPU. Batches rays if necessary.
Definition: RayGen.cpp:112
Buffer & getIndexToPixel(void)
Definition: PixelTable.hpp:44
FW_CUDA_FUNC const Vec3f & min(void) const
Definition: Util.hpp:48
float tmin
Definition: Util.hpp:68
F32 getF32(void)
Definition: Random.hpp:57
unsigned int U32
Definition: Defs.hpp:85
Class holding 3d scene.
Definition: Scene.hpp:44
FW_CUDA_FUNC void normalize(T len=(T) 1)
Definition: Math.hpp:145
Buffer & getGlobal(const String &name)
Definition: CudaModule.cpp:117
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction f
Definition: DLLImports.inl:88
const RayResult & getResultForSlot(S32 slot) const
Gets a ray result assigned to a given slot.
Definition: RayBuffer.hpp:103
CudaKernel & launch(void)
Definition: CudaKernel.cpp:179
void setRay(S32 slot, const Ray &ray)
Assigns ray to a slot. Id is same as slot.
Definition: RayBuffer.hpp:65
FW_CUDA_FUNC S normalized(T len=(T) 1) const
Definition: Math.hpp:144
U32 getU32(void)
Definition: Random.hpp:51
CUdeviceptr outSlotToID
void resize(S32 n)
Resizes the buffer.
Definition: RayBuffer.cpp:38
bool aoCPU(RayBuffer &orays, RayBuffer &irays, Scene &scene, int numSamples, float maxDist, bool &newBatch, U32 randomSeed=0)
Generates ao rays on the CPU. Batches rays if necessary.
Definition: RayGen.cpp:232
void setNeedClosestHit(bool c)
Sets whether the closet hit is needed.
Definition: RayBuffer.hpp:144
void addOptions(const String &options)
void setSourceFile(const String &path)
CUdeviceptr normals
CUdeviceptr outIDToSlot
FW_CUDA_FUNC T length(void) const
Definition: Math.hpp:143
Vec3f getVec3f(void)
Definition: Random.hpp:83