NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Buffer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2011, NVIDIA Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA Corporation nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "gpu/Buffer.hpp"
29 #include "gpu/CudaModule.hpp"
30 
31 using namespace FW;
32 
33 //------------------------------------------------------------------------
34 
35 #define FW_IO_BUFFER_SIZE 65536
36 
37 //------------------------------------------------------------------------
38 
39 void Buffer::wrapCPU(void* cpuPtr, S64 size)
40 {
41  FW_ASSERT(cpuPtr || !size);
42  FW_ASSERT(size >= 0);
43 
44  m_cpuPtr = (U8*)cpuPtr;
45  wrap(CPU, size);
46 }
47 
48 //------------------------------------------------------------------------
49 
50 void Buffer::wrapGL(GLuint glBuffer)
51 {
52  FW_ASSERT(glBuffer != 0);
53 
54  GLint size;
55  {
56  GLint oldBuffer;
57  glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &oldBuffer);
58  glBindBuffer(GL_ARRAY_BUFFER, glBuffer);
59  glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, &size);
60  glBindBuffer(GL_ARRAY_BUFFER, oldBuffer);
62  }
63 
64  m_glBuffer = glBuffer;
65  wrap(GL, size);
66 }
67 
68 //------------------------------------------------------------------------
69 
70 void Buffer::wrapCuda(CUdeviceptr cudaPtr, S64 size)
71 {
72  FW_ASSERT(cudaPtr || !size);
73 
74  m_cudaPtr = cudaPtr;
75  wrap(Cuda, size);
76 }
77 
78 //------------------------------------------------------------------------
79 
81 {
82  if ((m_exists & module) == 0 || m_exists == (U32)module || m_original == module)
83  return;
84 
85  setOwner(module, false);
86 
87  if (m_owner == module)
88  for (int i = 1; i < (int)Module_All; i <<= 1)
89  if (module != i && (m_exists & i) != 0 && (m_dirty & i) == 0)
90  {
91  setOwner((Module)i, false);
92  break;
93  }
94 
95  if (m_owner == module)
96  for (int i = 1; i < (int)Module_All; i <<= 1)
97  if (module != i && (m_exists & i) != 0)
98  {
99  setOwner((Module)i, false);
100  break;
101  }
102 
103  switch (module)
104  {
105  case CPU: cpuFree(m_cpuPtr, m_cpuBase, m_hints); break;
106  case GL: glFree(m_glBuffer, m_cudaGLReg); break;
107  case Cuda: cudaFree(m_cudaPtr, m_cudaBase, m_glBuffer, m_hints); break;
108  }
109  m_exists &= ~module;
110 }
111 
112 //------------------------------------------------------------------------
113 
114 void Buffer::getRange(void* dst, S64 srcOfs, S64 size, bool async, CUstream cudaStream) const
115 {
116  FW_ASSERT(dst || !size);
117  FW_ASSERT(srcOfs >= 0 && srcOfs <= m_size - size);
118  FW_ASSERT(size >= 0);
119 
120  if (!size)
121  return;
122 
123  switch (m_owner)
124  {
125  case GL:
126  {
127  GLint oldBuffer;
128  glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &oldBuffer);
129  glBindBuffer(GL_ARRAY_BUFFER, m_glBuffer);
131  glBindBuffer(GL_ARRAY_BUFFER, oldBuffer);
133  }
134  break;
135 
136  case Cuda:
137  memcpyDtoH(dst, m_cudaPtr + (U32)srcOfs, (U32)size, async, cudaStream);
138  break;
139 
140  default:
141  if ((m_exists & CPU) != 0)
142  memcpy(dst, m_cpuPtr + srcOfs, (size_t)size);
143  break;
144  }
145 }
146 
147 //------------------------------------------------------------------------
148 
149 void Buffer::setRange(S64 dstOfs, const void* src, S64 size, bool async, CUstream cudaStream)
150 {
151  FW_ASSERT(dstOfs >= 0 && dstOfs <= m_size - size);
152  FW_ASSERT(src || !size);
153  FW_ASSERT(size >= 0);
154 
155  if (!size)
156  return;
157 
158  switch (m_owner)
159  {
160  case GL:
161  {
162  GLint oldBuffer;
163  glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &oldBuffer);
165  glBufferSubData(GL_ARRAY_BUFFER, (GLintptr)dstOfs, (GLsizeiptr)size, src);
166  glBindBuffer(GL_ARRAY_BUFFER, oldBuffer);
168  }
169  break;
170 
171  case Cuda:
172  memcpyHtoD(getMutableCudaPtr(dstOfs), src, (U32)size, async, cudaStream);
173  break;
174 
175  default:
176  memcpy(getMutablePtr(dstOfs), src, (size_t)size);
177  break;
178  }
179 }
180 
181 //------------------------------------------------------------------------
182 
183 void Buffer::setRange(S64 dstOfs, Buffer& src, S64 srcOfs, S64 size, bool async, CUstream cudaStream)
184 {
185  FW_ASSERT(size >= 0);
186  FW_ASSERT(dstOfs >= 0 && dstOfs <= m_size - size);
187  FW_ASSERT(srcOfs >= 0 && srcOfs <= src.m_size - size);
188 
189  if (!size)
190  return;
191 
192  if ((src.m_exists & Cuda) != 0 && (src.m_dirty & Cuda) == 0 && (m_owner == Cuda || m_owner == Module_None))
193  memcpyDtoD(getMutableCudaPtr(dstOfs), src.getCudaPtr(srcOfs), (U32)size);
194  else if ((src.m_exists & CPU) != 0 && (src.m_dirty & CPU) == 0)
195  setRange(dstOfs, src.getPtr(srcOfs), size, async, cudaStream);
196  else
197  src.getRange(getMutablePtr(dstOfs), srcOfs, size, async, cudaStream);
198 }
199 
200 //------------------------------------------------------------------------
201 
202 void Buffer::clearRange(S64 dstOfs, int value, S64 size, bool async, CUstream cudaStream)
203 {
204  FW_ASSERT(size >= 0);
205  FW_ASSERT(dstOfs >= 0 && dstOfs <= m_size - size);
206  FW_UNREF(async); // unsupported
207  FW_UNREF(cudaStream); // unsupported
208 
209  if (!size)
210  return;
211 
212  if (m_owner == Cuda)
213  CudaModule::checkError("cuMemsetD8", cuMemsetD8(getMutableCudaPtr(dstOfs), (U8)value, (U32)size));
214  else
215  memset(getMutablePtr(dstOfs), value, (size_t)size);
216 }
217 
218 //------------------------------------------------------------------------
219 
220 void Buffer::setOwner(Module module, bool modify, bool async, CUstream cudaStream, S64 validSize)
221 {
222  FW_ASSERT((module & ~Module_All) == 0);
223  FW_ASSERT((module & (module - 1)) == 0);
224  if (validSize == -1)
225  validSize = m_size;
226  FW_ASSERT(validSize >= 0);
227 
228  // Unmap CudaGL if necessary.
229 
230  if ((m_hints & Hint_CudaGL) != 0 && (m_exists & Cuda) != 0)
231  {
232  FW_ASSERT((m_dirty & Cuda) == 0);
233  if ((module != Cuda && modify) ||
234  (module == GL && (m_dirty & GL) != 0))
235  {
236  cudaFree(m_cudaPtr, m_cudaBase, m_glBuffer, m_hints);
237  m_exists &= ~Cuda;
238  m_dirty &= ~GL;
239  }
240  }
241 
242  // Same owner => done.
243 
244  if (m_owner == module)
245  {
246  if (modify)
247  m_dirty = Module_All - module;
248  return;
249  }
250 
251  // Not page-locked => not asynchronous.
252 
253  if ((m_hints & Hint_PageLock) == 0)
254  async = false;
255 
256  // Validate CPU.
257 
258  if (module == CPU)
259  {
260  if ((m_exists & CPU) == 0)
261  {
262  cpuAlloc(m_cpuPtr, m_cpuBase, m_size, m_hints, m_align);
263  m_exists |= CPU;
264  m_dirty |= CPU;
265  }
266  validateCPU(async, cudaStream, validSize);
267  }
268 
269  // Validate GL.
270 
271  bool needGL = (module == GL);
272  if (module == Cuda && (m_hints & Hint_CudaGL) != 0)
273  needGL = true;
274 
275  if (needGL && (m_exists & GL) == 0)
276  {
277  validateCPU(false, NULL, validSize);
278  glAlloc(m_glBuffer, m_size, m_cpuPtr);
279  m_exists |= GL;
280  m_dirty &= ~GL;
281  }
282  else if (module == GL && (m_dirty & GL) != 0)
283  {
284  validateCPU(false, NULL, validSize);
285  FW_ASSERT((m_exists & CPU) != 0);
286  if (validSize)
287  {
288  profilePush("glBufferSubData");
289  GLint oldBuffer;
290  glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &oldBuffer);
291  glBindBuffer(GL_ARRAY_BUFFER, m_glBuffer);
292  glBufferSubData(GL_ARRAY_BUFFER, 0, (GLsizeiptr)validSize, m_cpuPtr);
293  glBindBuffer(GL_ARRAY_BUFFER, oldBuffer);
295  profilePop();
296  }
297  m_dirty &= ~GL;
298  }
299 
300  // Validate Cuda.
301 
302  if (module == Cuda)
303  {
304  if ((m_exists & Cuda) == 0)
305  {
306  cudaAlloc(m_cudaPtr, m_cudaBase, m_cudaGLReg, m_size, m_glBuffer, m_hints, m_align);
307  m_exists |= Cuda;
308  m_dirty |= Cuda;
309  if ((m_hints & Hint_CudaGL) != 0 && (m_dirty & GL) == 0)
310  m_dirty &= ~Cuda;
311  }
312 
313  if ((m_dirty & Cuda) != 0)
314  {
315  validateCPU(false, NULL, validSize);
316  if ((m_exists & CPU) != 0 && validSize)
317  memcpyHtoD(m_cudaPtr, m_cpuPtr, (U32)validSize, async, cudaStream);
318  m_dirty &= ~Cuda;
319  }
320  }
321 
322  // Set the new owner.
323 
324  m_owner = module;
325  if (modify)
326  m_dirty = Module_All - module;
327 }
328 
329 //------------------------------------------------------------------------
330 
332 {
333  S64 size;
334  s >> size;
335  resizeDiscard(size);
336 
338  S64 ofs = 0;
339  while (ofs < size)
340  {
341  int num = (int)min(size - ofs, (S64)tmp.getSize());
342  s.readFully(tmp.getPtr(), num);
343  setRange(ofs, tmp.getPtr(), num);
344  ofs += num;
345  }
346 }
347 
348 //------------------------------------------------------------------------
349 
351 {
352  s << m_size;
353 
355  S64 ofs = 0;
356  while (ofs < m_size)
357  {
358  int num = (int)min(m_size - ofs, (S64)tmp.getSize());
359  getRange(tmp.getPtr(), ofs, num);
360  s.write(tmp.getPtr(), num);
361  ofs += num;
362  }
363 }
364 
365 //------------------------------------------------------------------------
366 
367 void Buffer::init(S64 size, U32 hints, int align)
368 {
369  FW_ASSERT(size >= 0);
370 
371  m_hints = validateHints(hints, align, Module_None);
372  m_align = align;
373  m_size = size;
374  m_original = Module_None;
375  m_owner = Module_None;
376  m_exists = Module_None;
377  m_dirty = Module_None;
378 
379  m_cpuPtr = NULL;
380  m_cpuBase = NULL;
381  m_glBuffer = 0;
382  m_cudaPtr = NULL;
383  m_cudaBase = NULL;
384  m_cudaGLReg = false;
385 }
386 
387 //------------------------------------------------------------------------
388 
389 U32 Buffer::validateHints(U32 hints, int align, Module original)
390 {
391  FW_ASSERT((hints & ~Hint_All) == 0);
392  FW_ASSERT(align > 0);
393 
394  U32 res = Hint_None;
395  if ((hints & Hint_PageLock) != 0 && original != CPU)
396  res |= Hint_PageLock;
397  if ((hints & Hint_CudaGL) != 0 && original != Cuda && align == 1 && isAvailable_cuGLRegisterBufferObject())
398  res |= Hint_CudaGL;
399  return res;
400 }
401 
402 //------------------------------------------------------------------------
403 
404 void Buffer::deinit(void)
405 {
406  // Wrapped buffer => ensure that the original is up-to-date.
407 
408  if (m_original != Module_None)
409  setOwner(m_original, false);
410 
411  // Free buffers.
412 
413  if (m_original != Cuda)
414  cudaFree(m_cudaPtr, m_cudaBase, m_glBuffer, m_hints);
415 
416  if (m_original != GL)
417  glFree(m_glBuffer, m_cudaGLReg);
418  else if (m_cudaGLReg)
419  CudaModule::checkError("cuGLUnregisterBufferObject", cuGLUnregisterBufferObject(m_glBuffer));
420 
421  if (m_original != CPU)
422  cpuFree(m_cpuPtr, m_cpuBase, m_hints);
423 }
424 
425 //------------------------------------------------------------------------
426 
427 void Buffer::wrap(Module module, S64 size)
428 {
429  FW_ASSERT(size >= 0);
430  FW_ASSERT(m_exists == Module_None);
431 
432  m_hints = validateHints(m_hints, m_align, module);
433  m_size = size;
434  m_original = module;
435  m_owner = module;
436  m_exists = module;
437 }
438 
439 //------------------------------------------------------------------------
440 
441 void Buffer::realloc(S64 size, U32 hints, int align)
442 {
443  FW_ASSERT(size >= 0);
444  FW_ASSERT(align > 0);
445 
446  // No change => done.
447 
448  if (m_size == size && m_hints == hints && m_align == align)
449  return;
450 
451  // Wrapped buffer => free others.
452 
453  if (m_original)
454  {
455  switch (m_original)
456  {
457  case CPU: FW_ASSERT((S64)m_cpuPtr % align == 0); break;
458  case Cuda: FW_ASSERT((S64)m_cudaPtr % align == 0); break;
459  default: break;
460  }
461 
462  for (int i = 1; i < (int)Module_All; i <<= 1)
463  free((Module)i);
464 
465  FW_ASSERT(m_size == size);
466  m_hints = hints;
467  m_align = align;
468  return;
469  }
470 
471  // No need to retain old data => reset.
472 
473  if (!size || !m_size || m_exists == Module_None)
474  {
475  reset(NULL, size, hints, align);
476  return;
477  }
478 
479  // CUDA buffer => device-to-device copy.
480 
481  if (m_owner == Cuda && (hints & Hint_CudaGL) == 0)
482  {
483  CUdeviceptr cudaPtr;
484  CUdeviceptr cudaBase;
485  bool cudaGLReg = false;
486  cudaAlloc(cudaPtr, cudaBase, cudaGLReg, size, 0, hints, align);
487  memcpyXtoX(NULL, cudaPtr, NULL, getCudaPtr(), min(size, m_size), false, NULL);
488 
489  reset(NULL, size, hints, align);
490  m_exists = Cuda;
491  m_cudaPtr = cudaPtr;
492  m_cudaBase = cudaBase;
493  return;
494  }
495 
496  // Host-to-host copy.
497 
498  U8* cpuPtr;
499  U8* cpuBase;
500  cpuAlloc(cpuPtr, cpuBase, size, hints, align);
501  memcpy(cpuPtr, getPtr(), (size_t)min(size, m_size));
502 
503  reset(NULL, size, hints, align);
504  m_exists = CPU;
505  m_cpuPtr = cpuPtr;
506  m_cpuBase = cpuBase;
507 }
508 
509 //------------------------------------------------------------------------
510 
511 void Buffer::validateCPU(bool async, CUstream cudaStream, S64 validSize)
512 {
513  FW_ASSERT(validSize >= 0);
514 
515  // Already valid => done.
516 
517  if ((m_exists & CPU) != 0 && (m_dirty & CPU) == 0)
518  return;
519  m_dirty &= ~CPU;
520 
521  // Find source for the data.
522 
523  Module source = Module_None;
524  for (int i = 1; i < (int)Module_All; i <<= 1)
525  {
526  if (i != CPU && (m_exists & i) != 0 && (m_dirty & i) == 0)
527  {
528  source = (Module)i;
529  break;
530  }
531  }
532 
533  // No source => done.
534 
535  if (source == Module_None)
536  return;
537 
538  // No buffer => allocate one.
539 
540  if ((m_exists & CPU) == 0)
541  {
542  cpuAlloc(m_cpuPtr, m_cpuBase, m_size, m_hints, m_align);
543  m_exists |= CPU;
544  }
545 
546  // No valid data => no need to copy.
547 
548  if (!validSize)
549  return;
550 
551  // Copy data from the source.
552 
553  if (source == GL)
554  {
555  profilePush("glGetBufferSubData");
556  GLint oldBuffer;
557  glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &oldBuffer);
558  glBindBuffer(GL_ARRAY_BUFFER, m_glBuffer);
559  glGetBufferSubData(GL_ARRAY_BUFFER, 0, (GLsizeiptr)validSize, m_cpuPtr);
560  glBindBuffer(GL_ARRAY_BUFFER, oldBuffer);
562  profilePop();
563  }
564  else
565  {
566  FW_ASSERT(source == Cuda);
567  memcpyDtoH(m_cpuPtr, m_cudaPtr, (U32)validSize, async, cudaStream);
568  }
569 }
570 
571 //------------------------------------------------------------------------
572 
573 void Buffer::cpuAlloc(U8*& cpuPtr, U8*& cpuBase, S64 size, U32 hints, int align)
574 {
575  FW_ASSERT(align > 0);
576  if ((hints & Hint_PageLock) != 0)
577  {
578  checkSize(size, 32, "cuMemAllocHost");
579  CudaModule::checkError("cuMemAllocHost", cuMemAllocHost((void**)&cpuBase,
580  max(1U, (U32)(size + align - 1))));
581  }
582  else
583  {
584  checkSize(size, sizeof(U8*) * 8 - 1, "malloc");
585  cpuBase = new U8[(size_t)(size + align - 1)];
586  }
587 
588  cpuPtr = cpuBase + align - 1;
589  cpuPtr -= (UPTR)cpuPtr % (UPTR)align;
590 }
591 
592 //------------------------------------------------------------------------
593 
594 void Buffer::cpuFree(U8*& cpuPtr, U8*& cpuBase, U32 hints)
595 {
596  FW_ASSERT((cpuPtr == NULL) == (cpuBase == NULL));
597  if (cpuPtr)
598  {
599  if ((hints & Hint_PageLock) != 0)
600  CudaModule::checkError("cuMemFreeHost", cuMemFreeHost(cpuBase));
601  else
602  delete[] cpuBase;
603  cpuPtr = NULL;
604  cpuBase = NULL;
605  }
606 }
607 
608 //------------------------------------------------------------------------
609 
610 void Buffer::glAlloc(GLuint& glBuffer, S64 size, const void* data)
611 {
612  FW_ASSERT(size >= 0);
614 
615  GLint oldBuffer;
616  glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &oldBuffer);
617  glGenBuffers(1, &glBuffer);
618  glBindBuffer(GL_ARRAY_BUFFER, glBuffer);
619  checkSize(size, sizeof(GLsizeiptr) * 8 - 1, "glBufferData");
621  glBindBuffer(GL_ARRAY_BUFFER, oldBuffer);
623 }
624 
625 //------------------------------------------------------------------------
626 
627 void Buffer::glFree(GLuint& glBuffer, bool& cudaGLReg)
628 {
629  if (glBuffer)
630  {
631  if (cudaGLReg)
632  {
633  CudaModule::checkError("cuGLUnregisterBufferObject", cuGLUnregisterBufferObject(glBuffer));
634  cudaGLReg = false;
635  }
636  glDeleteBuffers(1, &glBuffer);
638  glBuffer = 0;
639  }
640 }
641 
642 //------------------------------------------------------------------------
643 
644 void Buffer::cudaAlloc(CUdeviceptr& cudaPtr, CUdeviceptr& cudaBase, bool& cudaGLReg, S64 size, GLuint glBuffer, U32 hints, int align)
645 {
647  if ((hints & Hint_CudaGL) == 0)
648  {
649  FW_ASSERT(align > 0);
650  checkSize(size, 32, "cuMemAlloc");
651  CudaModule::checkError("cuMemAlloc", cuMemAlloc(&cudaBase,
652  max(1U, (U32)(size + align - 1))));
653  cudaPtr = cudaBase + align - 1;
654  cudaPtr -= (U32)cudaPtr % (U32)align;
655  }
656  else
657  {
658  if (!cudaGLReg)
659  {
660  CudaModule::checkError("cuGLRegisterBufferObject", cuGLRegisterBufferObject(glBuffer));
661  cudaGLReg = true;
662  }
663  CUsize_t size;
664  FW_ASSERT(align == 1);
665  CudaModule::checkError("cuGLMapBufferObject", cuGLMapBufferObject(&cudaBase, &size, glBuffer));
666  cudaPtr = cudaBase;
667  }
668 }
669 
670 //------------------------------------------------------------------------
671 
672 void Buffer::cudaFree(CUdeviceptr& cudaPtr, CUdeviceptr& cudaBase, GLuint glBuffer, U32 hints)
673 {
674  FW_ASSERT((cudaPtr == NULL) == (cudaBase == NULL));
675  if (cudaPtr)
676  {
677  if ((hints & Hint_CudaGL) == 0)
678  CudaModule::checkError("cuMemFree", cuMemFree(cudaBase));
679  else
680  CudaModule::checkError("cuGLUnmapBufferObject", cuGLUnmapBufferObject(glBuffer));
681  cudaPtr = NULL;
682  cudaBase = NULL;
683  }
684 }
685 
686 //------------------------------------------------------------------------
687 
688 void Buffer::checkSize(S64 size, int bits, const String& funcName)
689 {
690  FW_ASSERT(size >= 0);
691  if ((U64)size > (((U64)1 << bits) - 1))
692  fail("Buffer too large for %s()!", funcName.getPtr());
693 }
694 
695 //------------------------------------------------------------------------
696 
697 void Buffer::memcpyXtoX(void* dstHost, CUdeviceptr dstDevice, const void* srcHost, CUdeviceptr srcDevice, S64 size, bool async, CUstream cudaStream)
698 {
699  CUresult res;
700  if (size <= 0)
701  return;
702 
703  // Try to copy.
704 
705  if (dstHost && srcHost)
706  {
707  memcpy(dstHost, srcHost, (size_t)size);
708  res = CUDA_SUCCESS;
709  }
710  else if (srcHost)
711  {
712  profilePush("cuMemcpyHtoD");
713  if (async && isAvailable_cuMemcpyHtoDAsync())
714  res = cuMemcpyHtoDAsync(dstDevice, srcHost, (U32)size, cudaStream);
715  else
716  res = cuMemcpyHtoD(dstDevice, srcHost, (U32)size);
717  profilePop();
718  }
719  else if (dstHost)
720  {
721  profilePush("cuMemcpyDtoH");
722  if (async && isAvailable_cuMemcpyDtoHAsync())
723  res = cuMemcpyDtoHAsync(dstHost, srcDevice, (U32)size, cudaStream);
724  else
725  res = cuMemcpyDtoH(dstHost, srcDevice, (U32)size);
726  profilePop();
727  }
728  else
729  {
730  profilePush("cuMemcpyDtoD");
731 #if (CUDA_VERSION >= 3000)
732  if (async && isAvailable_cuMemcpyDtoDAsync())
733  res = cuMemcpyDtoDAsync(dstDevice, srcDevice, (U32)size, cudaStream);
734  else
735 #endif
736  res = cuMemcpyDtoD(dstDevice, srcDevice, (U32)size);
737  profilePop();
738  }
739 
740  // Success => done.
741 
742  if (res == CUDA_SUCCESS)
743  return;
744 
745  // Single byte => fail.
746 
747  if (size == 1)
748  CudaModule::checkError("cuMemcpyXtoX", res);
749 
750  // Otherwise => subdivide.
751  // CUDA driver does not allow memcpy() to cross allocation boundaries.
752 
753  S64 mid = size >> 1;
754  memcpyXtoX(dstHost, dstDevice, srcHost, srcDevice, mid, async, cudaStream);
755 
756  memcpyXtoX(
757  (dstHost) ? (U8*)dstHost + mid : NULL,
758  (dstHost) ? NULL : (CUdeviceptr)(dstDevice + mid),
759  (srcHost) ? (const U8*)srcHost + mid : NULL,
760  (srcHost) ? NULL : (CUdeviceptr)(srcDevice + mid),
761  size - mid, async, cudaStream);
762 }
763 
764 //------------------------------------------------------------------------
#define FW_UNREF(X)
Definition: Defs.hpp:78
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void * srcHost
Definition: DLLImports.inl:128
#define NULL
Definition: Defs.hpp:39
void setRange(S64 dstOfs, const void *src, S64 size, bool async=false, CUstream cudaStream=NULL)
Definition: Buffer.cpp:149
const char * getPtr(void) const
Definition: String.hpp:51
GLuint getMutableGLBuffer(void)
Definition: Buffer.hpp:111
__w64 U32 UPTR
Definition: Defs.hpp:106
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture glBindBuffer
Definition: DLLImports.inl:315
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream cuGLUnmapBufferObject
Definition: DLLImports.inl:102
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name cuMemAlloc
Definition: DLLImports.inl:110
virtual void writeToStream(OutputStream &s) const
Definition: Buffer.cpp:350
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr dstDevice
Definition: DLLImports.inl:128
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule * module
Definition: DLLImports.inl:60
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize cuMemcpyDtoH
Definition: DLLImports.inl:116
#define GL_BUFFER_SIZE
Definition: DLLImports.hpp:153
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void * dstHost
Definition: DLLImports.inl:116
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid * data
Definition: DLLImports.inl:319
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize cuMemFree
Definition: DLLImports.inl:112
void wrapCuda(CUdeviceptr cudaPtr, S64 size)
Definition: Buffer.cpp:70
virtual void readFromStream(InputStream &s)
Definition: Buffer.cpp:331
CUdeviceptr getCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:108
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name cuMemFreeHost
Definition: DLLImports.inl:66
unsigned __int64 U64
Definition: Defs.hpp:97
ptrdiff_t GLintptr
Definition: DLLImports.hpp:147
static void checkErrors(void)
Definition: GLContext.cpp:1003
#define GL_ARRAY_BUFFER
Definition: DLLImports.hpp:152
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer glBufferData
Definition: DLLImports.inl:319
virtual void write(const void *ptr, int size)=0
unsigned int CUsize_t
Definition: DLLImports.hpp:121
const U8 * getPtr(S64 ofs=0)
Definition: Buffer.hpp:106
void setOwner(Module module, bool modify, bool async=false, CUstream cudaStream=NULL, S64 validSize=-1)
Definition: Buffer.cpp:220
void wrapCPU(void *cpuPtr, S64 size)
Definition: Buffer.cpp:39
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name glGetBufferSubData
Definition: DLLImports.inl:339
static void checkError(const char *funcName, CUresult res)
Definition: CudaModule.cpp:487
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr cuMemAllocHost
Definition: DLLImports.inl:114
static void memcpyHtoD(CUdeviceptr dst, const void *src, S64 size, bool async=false, CUstream cudaStream=NULL)
Definition: Buffer.hpp:124
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy cuMemcpyHtoDAsync
Definition: DLLImports.inl:128
CUdeviceptr getMutableCudaPtr(S64 ofs=0)
Definition: Buffer.hpp:112
U8 * getMutablePtr(S64 ofs=0)
Definition: Buffer.hpp:110
void readFully(void *ptr, int size)
Definition: Stream.cpp:36
FW_CUDA_FUNC T min(const VectorBase< T, L, S > &v)
Definition: Math.hpp:461
FW_CUDA_FUNC T max(const VectorBase< T, L, S > &v)
Definition: Math.hpp:462
void free(Module module)
Definition: Buffer.cpp:80
#define GL_ARRAY_BUFFER_BINDING
Definition: DLLImports.hpp:189
#define FW_ASSERT(X)
Definition: Defs.hpp:67
#define FW_IO_BUFFER_SIZE
Definition: Buffer.cpp:35
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream cuMemsetD8
Definition: DLLImports.inl:132
void getRange(void *dst, S64 srcOfs, S64 size, bool async=false, CUstream cudaStream=NULL) const
Definition: Buffer.cpp:114
signed __int64 S64
Definition: Defs.hpp:98
unsigned int U32
Definition: Defs.hpp:85
void wrapGL(GLuint glBuffer)
Definition: Buffer.cpp:50
static void memcpyDtoD(CUdeviceptr dst, CUdeviceptr src, S64 size, bool async=false, CUstream cudaStream=NULL)
Definition: Buffer.hpp:126
void clearRange(S64 dstOfs, int value, S64 size, bool async=false, CUstream cudaStream=NULL)
Definition: Buffer.cpp:202
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value
Definition: DLLImports.inl:84
#define GL_STATIC_DRAW
Definition: DLLImports.hpp:178
static void memcpyDtoH(void *dst, CUdeviceptr src, S64 size, bool async=false, CUstream cudaStream=NULL)
Definition: Buffer.hpp:125
void reset(void)
Definition: Buffer.hpp:78
unsigned char U8
Definition: Defs.hpp:83
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr srcDevice
Definition: DLLImports.inl:116
static void staticInit(void)
Definition: CudaModule.cpp:311
const T * getPtr(S idx=0) const
Definition: Array.hpp:202
void fail(const char *fmt,...)
Definition: Defs.cpp:304
void resizeDiscard(S64 size)
Definition: Buffer.hpp:83
ptrdiff_t GLsizeiptr
Definition: DLLImports.hpp:148
void profilePop(void)
Definition: Defs.cpp:602
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr size
Definition: DLLImports.inl:319
static void staticInit(void)
Definition: GLContext.cpp:894
S getSize(void) const
Definition: Array.hpp:188
void profilePush(const char *id)
Definition: Defs.cpp:544