NTrace
GPU ray tracing framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CudaCompiler.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2011, NVIDIA Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA Corporation nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "gpu/CudaCompiler.hpp"
29 #include "base/DLLImports.hpp"
30 #include "io/File.hpp"
31 #include "gui/Window.hpp"
32 
33 #include <process.h>
34 #include <stdio.h>
35 #include <direct.h>
36 
37 using namespace FW;
38 
39 //------------------------------------------------------------------------
40 
41 #define SHOW_TOOL_PATHS 1
42 #define SHOW_NVCC_OUTPUT 0
43 
44 //------------------------------------------------------------------------
45 
46 String CudaCompiler::s_frameworkPath;
47 String CudaCompiler::s_staticCudaBinPath;
48 String CudaCompiler::s_staticOptions;
49 String CudaCompiler::s_staticPreamble;
50 String CudaCompiler::s_staticBinaryFormat;
51 
52 bool CudaCompiler::s_inited = false;
53 Hash<U64, Array<U8>*> CudaCompiler::s_cubinCache;
54 Hash<U64, CudaModule*> CudaCompiler::s_moduleCache;
55 U32 CudaCompiler::s_nvccVersionHash = 0;
56 String CudaCompiler::s_nvccCommand;
57 
58 //------------------------------------------------------------------------
59 
60 String FW::formatInlineCuda(const char* file, int line, const char* code)
61 {
62  static const char s_header[] =
63  "#include \"base/Math.hpp\"\n"
64  "#include <stdio.h>\n"
65  "namespace FW\n"
66  "{\n"
67  "extern \"C\"\n"
68  "{\n"
69  ;
70 
71  static const char s_footer[] =
72  "}\n"
73  "}\n"
74  "\n"
75  ;
76 
77  // Check that framework path is valid.
78 
79  if (!CudaCompiler::getFrameworkPath().getLength())
80  fail("FW_INLINE_CUDA: Framework path not defined! Please call CudaCompiler::setFrameworkPath().");
81 
82  // Replace backslashes with slashes in file name.
83 
84  Array<char> fixedFile(file, (int)strlen(file) + 1);
85  for (int i = 0; i < fixedFile.getSize(); i++)
86  if (fixedFile[i] == '\\')
87  fixedFile[i] = '/';
88 
89  // Count linefeeds.
90 
91  int numLinefeeds = 0;
92  for (int i = 0; s_header[i]; i++)
93  if (s_header[i] == '\n')
94  numLinefeeds++;
95  for (int i = 0; code[i]; i++)
96  if (code[i] == '\n')
97  numLinefeeds++;
98 
99  // Piece the code together.
100 
101  return sprintf("#line %d \"%s\"\n%s%s%s\n", line - numLinefeeds, fixedFile.getPtr(), s_header, code, s_footer);
102 }
103 
104 //------------------------------------------------------------------------
105 
106 CudaModule* FW::compileInlineCuda(const char* file, int line, const char* code)
107 {
108  CudaCompiler compiler;
109  compiler.setInlineSource(formatInlineCuda(file, line, code), file);
110  compiler.addOptions("-use_fast_math");
111  return compiler.compile();
112 }
113 
114 //------------------------------------------------------------------------
115 
117 : m_cachePath ("cudacache"),
118  m_overriddenSMArch (0),
119 
120  m_sourceHash (0),
121  m_optionHash (0),
122  m_defineHash (0),
123  m_preambleHash (0),
124  m_memHash (0),
125  m_sourceHashValid (false),
126  m_optionHashValid (false),
127  m_defineHashValid (false),
128  m_preambleHashValid (false),
129  m_memHashValid (false),
130 
131  m_window (NULL)
132 {
133  if (getFrameworkPath().getLength())
135 }
136 
137 //------------------------------------------------------------------------
138 
140 {
141 }
142 
143 //------------------------------------------------------------------------
144 
145 CudaModule* CudaCompiler::compile(bool enablePrints, bool autoFail)
146 {
147  staticInit();
148 
149  // Cached in memory => done.
150 
151  U64 memHash = getMemHash();
152  CudaModule** found = s_moduleCache.search(memHash);
153  if (found)
154  return *found;
155 
156 #if FW_DEBUG
157  // Load CUBIN file.
158  //return loadDbgCubin(enablePrints);
159 #endif
160 
161  // Compile CUBIN file.
162 
163  String cubinFile = compileCubinFile(enablePrints, autoFail);
164  if (!cubinFile.getLength())
165  return NULL;
166 
167  // Create module and add to memory cache.
168 
169  CudaModule* module = new CudaModule(cubinFile);
170  s_moduleCache.add(memHash, module);
171  return module;
172 }
173 
174 //------------------------------------------------------------------------
175 
177 {
178  String cubinFile = m_sourceFile.getFileName()+"bin";
179 #if !FW_64
180  String cubinDir = "build\\Win32";
181 #else
182  String cubinDir = "build\\x64_Debug\\cuda";
183 #endif
184 #if FW_DEBUG
185  cubinDir += "_Debug\\cuda";
186 #else
187  cubinDir += "_Release\\cuda";
188 #endif
189 
190  String fullCubinPath = cubinDir + "\\" + cubinFile;
191 
192  // Create module and add to memory cache.
193 
194  if (enablePrints)
195  printf("Loading %s...", fullCubinPath.getPtr());
196 
197  CudaModule* module = new CudaModule(fullCubinPath);
198  U64 memHash = getMemHash();
199  s_moduleCache.add(memHash, module);
200 
201  if (enablePrints)
202  printf("done\n");
203  return module;
204 }
205 
206 
207 //------------------------------------------------------------------------
208 
209 const Array<U8>* CudaCompiler::compileCubin(bool enablePrints, bool autoFail)
210 {
211  staticInit();
212 
213  // Cached in memory => done.
214 
215  U64 memHash = getMemHash();
216  Array<U8>** found = s_cubinCache.search(memHash);
217  if (found)
218  return *found;
219 
220  // Compile CUBIN file.
221 
222  String cubinFile = compileCubinFile(enablePrints, autoFail);
223  if (!cubinFile.getLength())
224  return NULL;
225 
226  // Load CUBIN.
227 
228  File in(cubinFile, File::Read);
229  S32 size = (S32)in.getSize();
230  Array<U8>* cubin = new Array<U8>(NULL, size + 1);
231  in.read(cubin->getPtr(), size);
232  cubin->set(size, 0);
233 
234  // Add to memory cache.
235 
236  s_cubinCache.add(memHash, cubin);
237  return cubin;
238 }
239 
240 //------------------------------------------------------------------------
241 
242 String CudaCompiler::compileCubinFile(bool enablePrints, bool autoFail)
243 {
244  staticInit();
245 
246  // Check that the source file exists.
247 
248  if (m_sourceFile.getLength())
249  File file(m_sourceFile, File::Read);
250  else if (!m_inlineSource.getLength())
251  setError("CudaCompiler: No source file specified!");
252 
253  if (autoFail)
254  failIfError();
255  if (hasError())
256  return "";
257 
258  // Cache directory does not exist => create it.
259 
260  createCacheDir();
261  if (autoFail)
262  failIfError();
263  if (hasError())
264  return "";
265 
266  // Preprocess.
267 
268  writeDefineFile();
269  String cubinFile, finalOpts;
270  runPreprocessor(cubinFile, finalOpts);
271  if (autoFail)
272  failIfError();
273  if (hasError())
274  return "";
275 
276  // CUBIN exists => done.
277 
278  if (fileExists(cubinFile))
279  return cubinFile;
280 
281  // Compile.
282 
283  if (enablePrints)
284  {
285  if (m_sourceFile.getLength())
286  printf("CudaCompiler: Compiling '%s'...", m_sourceFile.getPtr());
287  else if (m_inlineOrigin.getLength())
288  printf("CudaCompiler: Compiling inline code from '%s'...", m_inlineOrigin.getPtr());
289  else
290  printf("CudaCompiler: Compiling inline code...");
291  }
292  if (m_window)
293  m_window->showModalMessage("Compiling CUDA kernel...\nThis will take a few seconds.");
294 
295  runCompiler(cubinFile, finalOpts);
296 
297  if (enablePrints)
298  printf((hasError()) ? " Failed.\n" : " Done.\n");
299  if (autoFail)
300  failIfError();
301  return (hasError()) ? "" : cubinFile;
302 }
303 
304 //------------------------------------------------------------------------
305 
307 {
308  if (s_inited || hasError())
309  return;
310  s_inited = true;
311 
312  // List potential CUDA and Visual Studio paths.
313 
314  F32 driverVersion = CudaModule::getDriverVersion() / 10.0f;
315  Array<String> potentialCudaPaths;
316  Array<String> potentialVSPaths;
317 
318  for (char drive = 'C'; drive <= 'E'; drive++)
319  {
320  for (int progX86 = 0; progX86 <= 1; progX86++)
321  {
322  String prog = sprintf("%c:\\%s", drive, (progX86 == 0) ? "Program Files" : "Program Files (x86)");
323  potentialCudaPaths.add(prog + sprintf("\\NVIDIA GPU Computing Toolkit\\CUDA\\v%.1f", driverVersion));
324  potentialVSPaths.add(prog + "\\Microsoft Visual Studio 10.0");
325  potentialVSPaths.add(prog + "\\Microsoft Visual Studio 9.0");
326  potentialVSPaths.add(prog + "\\Microsoft Visual Studio 8");
327  }
328  potentialCudaPaths.add(sprintf("%c:\\CUDA", drive));
329  }
330 
331  // Query environment variables.
332 
333  String pathEnv = queryEnv("PATH");
334  String includeEnv = queryEnv("INCLUDE");
335  String cudaBinEnv = queryEnv("CUDA_BIN_PATH");
336  String cudaIncEnv = queryEnv("CUDA_INC_PATH");
337 
338  // Find CUDA binary path.
339 
340  Array<String> cudaBinList;
341  if (s_staticCudaBinPath.getLength())
342  cudaBinList.add(s_staticCudaBinPath);
343  else
344  {
345  cudaBinList.add(cudaBinEnv);
346  splitPathList(cudaBinList, pathEnv);
347  for (int i = 0; i < potentialCudaPaths.getSize(); i++)
348  {
349  cudaBinList.add(potentialCudaPaths[i] + "\\bin");
350  cudaBinList.add(potentialCudaPaths[i] + "\\bin64");
351  }
352  }
353 
354  String cudaBinPath;
355  for (int i = 0; i < cudaBinList.getSize(); i++)
356  {
357  if (!cudaBinList[i].getLength() || !fileExists(cudaBinList[i] + "\\nvcc.exe"))
358  continue;
359 
360  // Execute "nvcc --version".
361 
362  FILE* pipe = _popen(sprintf("\"%s\\nvcc.exe\" --version 2>nul", cudaBinList[i].getPtr()).getPtr(), "rt");
363  if (!pipe)
364  continue;
365 
366  Array<char> output;
367  while (!feof(pipe))
368  output.add((char)fgetc(pipe));
369  fclose(pipe);
370 
371  // Invalid response => ignore.
372 
373  output.add(0);
374  String response(output.getPtr());
375  if (!response.startsWith("nvcc: NVIDIA"))
376  continue;
377 
378  // Hash response.
379 
380  cudaBinPath = cudaBinList[i];
381  s_nvccVersionHash = hash<String>(response);
382  break;
383  }
384 
385  if (!cudaBinPath.getLength())
386  fail("Unable to detect CUDA Toolkit binary path!\nPlease set CUDA_BIN_PATH environment variable.");
387 
388  // Find Visual Studio binary path.
389 
390  Array<String> vsBinList;
391  splitPathList(vsBinList, pathEnv);
392  for (int i = 0; i < potentialVSPaths.getSize(); i++)
393  vsBinList.add(potentialVSPaths[i] + "\\VC\\bin");
394 
395  String vsBinPath;
396  for (int i = 0; i < vsBinList.getSize(); i++)
397  {
398  if (vsBinList[i].getLength() && fileExists(vsBinList[i] + "\\vcvars32.bat"))
399  {
400  vsBinPath = vsBinList[i];
401  break;
402  }
403  }
404 
405  if (!vsBinPath.getLength())
406  fail("Unable to detect Visual Studio binary path!\nPlease run VCVARS32.BAT.");
407 
408  // Find CUDA include path.
409 
410  Array<String> cudaIncList;
411  cudaIncList.add(cudaBinPath + "\\..\\include");
412  cudaIncList.add(cudaIncEnv);
413  splitPathList(cudaIncList, includeEnv);
414  cudaIncList.add("C:\\CUDA\\include");
415  cudaIncList.add("D:\\CUDA\\include");
416 
417  String cudaIncPath;
418  for (int i = 0; i < cudaIncList.getSize(); i++)
419  {
420  if (cudaIncList[i].getLength() && fileExists(cudaIncList[i] + "\\cuda.h"))
421  {
422  cudaIncPath = cudaIncList[i];
423  break;
424  }
425  }
426 
427  if (!cudaIncPath.getLength())
428  fail("Unable to detect CUDA Toolkit include path!\nPlease set CUDA_INC_PATH environment variable.");
429 
430  // Find Visual Studio include path.
431 
432  Array<String> vsIncList;
433  vsIncList.add(vsBinPath + "\\..\\INCLUDE");
434  splitPathList(vsIncList, includeEnv);
435  for (int i = 0; i < potentialVSPaths.getSize(); i++)
436  vsIncList.add(potentialVSPaths[i] + "\\VC\\INCLUDE");
437 
438  String vsIncPath;
439  for (int i = 0; i < vsIncList.getSize(); i++)
440  {
441  if (vsIncList[i].getLength() && fileExists(vsIncList[i] + "\\crtdefs.h"))
442  {
443  vsIncPath = vsIncList[i];
444  break;
445  }
446  }
447 
448  if (!vsIncPath.getLength())
449  fail("Unable to detect Visual Studio include path!\nPlease run VCVARS32.BAT.");
450 
451  // Show paths.
452 
453 #if SHOW_TOOL_PATHS
454  printf("\n");
455  printf("CUDA binary path: \"%s\"\n", cudaBinPath.getPtr());
456  printf("CUDA include path: \"%s\"\n", cudaIncPath.getPtr());
457  printf("VS binary path: \"%s\"\n", vsBinPath.getPtr());
458  printf("VS include path: \"%s\"\n", vsIncPath.getPtr());
459  printf("\n");
460 #endif
461 
462  // Form NVCC command line.
463 
464  s_nvccCommand = sprintf("set PATH=%s;%s & nvcc.exe -ccbin \"%s\" -I\"%s\" -I\"%s\" -I. -D_CRT_SECURE_NO_DEPRECATE",
465  cudaBinPath.getPtr(),
466  pathEnv.getPtr(),
467  vsBinPath.getPtr(),
468  cudaIncPath.getPtr(),
469  vsIncPath.getPtr());
470 }
471 
472 //------------------------------------------------------------------------
473 
475 {
476  s_frameworkPath = "";
477  s_staticCudaBinPath = "";
478  s_staticOptions = "";
479  s_staticPreamble = "";
480  s_staticBinaryFormat = "";
481 
482  if (!s_inited)
483  return;
484  s_inited = false;
485 
486  flushMemCache();
487  s_cubinCache.reset();
488  s_moduleCache.reset();
489  s_nvccCommand = "";
490 }
491 
492 //------------------------------------------------------------------------
493 
495 {
496  for (int i = s_cubinCache.firstSlot(); i != -1; i = s_cubinCache.nextSlot(i))
497  delete s_cubinCache.getSlot(i).value;
498  s_cubinCache.clear();
499 
500  for (int i = s_moduleCache.firstSlot(); i != -1; i = s_moduleCache.nextSlot(i))
501  delete s_moduleCache.getSlot(i).value;
502  s_moduleCache.clear();
503 }
504 
505 //------------------------------------------------------------------------
506 
507 String CudaCompiler::queryEnv(const String& name)
508 {
509  // Query buffer size.
510 
511  DWORD bufferSize = GetEnvironmentVariable(name.getPtr(), NULL, 0);
512  if (!bufferSize)
513  return "";
514 
515  // Query value.
516 
517  char* buffer = new char[bufferSize];
518  buffer[0] = '\0';
519  GetEnvironmentVariable(name.getPtr(), buffer, bufferSize);
520 
521  // Convert to String.
522 
523  String res = buffer;
524  delete[] buffer;
525  return res;
526 }
527 
528 //------------------------------------------------------------------------
529 
530 void CudaCompiler::splitPathList(Array<String>& res, const String& value)
531 {
532  for (int startIdx = 0; startIdx < value.getLength();)
533  {
534  int endIdx = value.indexOf(';', startIdx);
535  if (endIdx == -1)
536  endIdx = value.getLength();
537 
538  String item = value.substring(startIdx, endIdx);
539  if (item.getLength() >= 2 && item.startsWith("\"") && item.endsWith("\""))
540  item = item.substring(1, item.getLength() - 1);
541  res.add(item);
542 
543  startIdx = endIdx + 1;
544  }
545 }
546 
547 //------------------------------------------------------------------------
548 
549 bool CudaCompiler::fileExists(const String& name)
550 {
551  return ((GetFileAttributes(name.getPtr()) & FILE_ATTRIBUTE_DIRECTORY) == 0);
552 }
553 
554 //------------------------------------------------------------------------
555 
556 String CudaCompiler::removeOption(const String& opts, const String& tag, bool hasParam)
557 {
558  String res = opts;
559  for (int i = 0; i < res.getLength(); i++)
560  {
561  bool match = true;
562  for (int j = 0; match && j < tag.getLength(); j++)
563  match = (i + j < res.getLength() && res[i + j] == tag[j]);
564  if (!match)
565  continue;
566 
567  int idx = res.indexOf(' ', i);
568  if (idx != -1 && hasParam)
569  idx = res.indexOf(' ', idx + 1);
570 
571  res = res.substring(0, i) + ((idx == -1) ? "" : res.substring(idx + 1));
572  i--;
573  }
574  return res;
575 }
576 
577 //------------------------------------------------------------------------
578 
579 U64 CudaCompiler::getMemHash(void)
580 {
581  if (m_memHashValid)
582  return m_memHash;
583 
584  if (!m_sourceHashValid)
585  {
586  m_sourceHash = hashBits(hash<String>(m_sourceFile), hash<String>(m_inlineSource));
587  m_sourceHashValid = true;
588  }
589 
590  if (!m_optionHashValid)
591  {
592  m_optionHash = hash<String>(m_options);
593  m_optionHashValid = true;
594  }
595 
596  if (!m_defineHashValid)
597  {
599  for (int i = m_defines.firstSlot(); i != -1; i = m_defines.nextSlot(i))
600  {
601  a += hash<String>(m_defines.getSlot(i).key);
602  b += hash<String>(m_defines.getSlot(i).value);
603  FW_JENKINS_MIX(a, b, c);
604  }
605  m_defineHash = ((U64)b << 32) | c;
606  m_defineHashValid = true;
607  }
608 
609  if (!m_preambleHashValid)
610  {
611  m_preambleHash = hash<String>(m_preamble);
612  m_preambleHashValid = true;
613  }
614 
615  U32 a = FW_HASH_MAGIC + m_sourceHash;
616  U32 b = FW_HASH_MAGIC + m_optionHash;
617  U32 c = FW_HASH_MAGIC + m_preambleHash;
618  FW_JENKINS_MIX(a, b, c);
619  a += (U32)(m_defineHash >> 32);
620  b += (U32)m_defineHash;
621  FW_JENKINS_MIX(a, b, c);
622  m_memHash = ((U64)b << 32) | c;
623  m_memHashValid = true;
624  return m_memHash;
625 }
626 
627 //------------------------------------------------------------------------
628 
629 void CudaCompiler::createCacheDir(void)
630 {
631  DWORD res = GetFileAttributes(m_cachePath.getPtr());
632  if (res == 0xFFFFFFFF || (res & FILE_ATTRIBUTE_DIRECTORY) == 0)
633  if (CreateDirectory(m_cachePath.getPtr(), NULL) == 0)
634  fail("Cannot create CudaCompiler cache directory '%s'!", m_cachePath.getPtr());
635 }
636 
637 //------------------------------------------------------------------------
638 
639 void CudaCompiler::writeDefineFile(void)
640 {
641  File file(m_cachePath + "\\defines.inl", File::Create);
642  BufferedOutputStream out(file);
643  for (int i = m_defines.firstSlot(); i != -1; i = m_defines.nextSlot(i))
644  out.writef("#define %s %s\n",
645  m_defines.getSlot(i).key.getPtr(),
646  m_defines.getSlot(i).value.getPtr());
647  out.writef("%s\n", s_staticPreamble.getPtr());
648  out.writef("%s\n", m_preamble.getPtr());
649  out.flush();
650 }
651 
652 //------------------------------------------------------------------------
653 
654 void CudaCompiler::initLogFile(const String& name, const String& firstLine)
655 {
656  File file(name, File::Create);
657  BufferedOutputStream out(file);
658  out.writef("%s\n", firstLine.getPtr());
659  out.flush();
660 }
661 
662 //------------------------------------------------------------------------
663 
664 void CudaCompiler::runPreprocessor(String& cubinFile, String& finalOpts)
665 {
666  // Determine preprocessor options.
667 
668  finalOpts = "";
669  if (s_staticOptions.getLength())
670  finalOpts += s_staticOptions + " ";
671  finalOpts += m_options;
672  finalOpts = fixOptions(finalOpts);
673 
674  // Preprocess.
675 
676  String logFile = m_cachePath + "\\preprocess.log";
677  String cmd = sprintf("%s -E -o \"%s\\preprocessed.cu\" -include \"%s\\defines.inl\" %s \"%s\" 2>>\"%s\"",
678  s_nvccCommand.getPtr(),
679  m_cachePath.getPtr(),
680  m_cachePath.getPtr(),
681  finalOpts.getPtr(),
682  saveSource().getPtr(),
683  logFile.getPtr());
684 
685  // TODO: Solve or delete !!!
686  //if(m_sourceFile == "src/rt/kernels/fermi_speculative_while_while.cu")
687  // cmd = "\"C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v4.2\\bin\\nvcc.exe\" -gencode=arch=compute_20,code=sm_20 --use-local-env --cl-version 2010 -ccbin \"C:\\Program Files\\Microsoft Visual Studio 10.0\\VC\\bin\" -I\"src/rt\" -I\"src/framework\" -I\"C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v4.2\\include\" -G --keep-dir \"Debug\" -maxrregcount=0 --machine 32 -E -o \"cudacache\\preprocessed.cu\" \"D:\\GitHub\\NTrace\\src\\rt\\kernels\\fermi_speculative_while_while.cu\"";
688 
689  initLogFile(logFile, cmd);
690  if (system(cmd.getPtr()) != 0)
691  {
692  setLoggedError("CudaCompiler: Preprocessing failed!", logFile);
693  return;
694  }
695 
696  // Specify binary format.
697 
698  if (s_staticBinaryFormat.getLength())
699  finalOpts += s_staticBinaryFormat;
700  else
701  finalOpts += "-cubin";
702  finalOpts += " ";
703 
704  // Hash preprocessed source.
705 
706  File file(m_cachePath + "\\preprocessed.cu", File::Read);
707  BufferedInputStream in(file);
708 
709  U32 hashA = FW_HASH_MAGIC;
710  U32 hashB = FW_HASH_MAGIC;
711  U32 hashC = FW_HASH_MAGIC;
712 
713  for (int lineIdx = 0;; lineIdx++)
714  {
715  const char* linePtr = in.readLine();
716  if (!linePtr)
717  break;
718 
719  // Trim from the left.
720 
721  while (*linePtr == ' ' || *linePtr == '\t')
722  linePtr++;
723 
724  // Empty, directive, or comment => ignore.
725 
726  if (*linePtr == '\0') continue;
727  if (*linePtr == '#') continue;
728  if (*linePtr == '/' && linePtr[1] == '/') continue;
729 
730  // Hash.
731 
732  hashA += hash<String>(String(linePtr));
733  FW_JENKINS_MIX(hashA, hashB, hashC);
734  }
735 
736  // Hash final compiler options and version.
737 
738  finalOpts = fixOptions(finalOpts);
739  hashA += hash<String>(finalOpts);
740  hashB += s_nvccVersionHash;
741  FW_JENKINS_MIX(hashA, hashB, hashC);
742  cubinFile = sprintf("%s\\%08x%08x.cubin", m_cachePath.getPtr(), hashB, hashC);
743  }
744 
745 //------------------------------------------------------------------------
746 
747 void CudaCompiler::runCompiler(const String& cubinFile, const String& finalOpts)
748 {
749  String logFile = m_cachePath + "\\compile.log";
750  String cmd = sprintf("%s -o \"%s\" -include \"%s\\defines.inl\" %s \"%s\" 2>>\"%s\"",
751  s_nvccCommand.getPtr(),
752  cubinFile.getPtr(),
753  m_cachePath.getPtr(),
754  finalOpts.getPtr(),
755  saveSource().getPtr(),
756  logFile.getPtr());
757 
758  initLogFile(logFile, cmd);
759  if (system(cmd.getPtr()) != 0 || !fileExists(cubinFile))
760  setLoggedError("CudaCompiler: Compilation failed!", logFile);
761 
762 #if SHOW_NVCC_OUTPUT
763  setLoggedError("", logFile);
764  printf("%s\n", getError().getPtr());
765  clearError();
766 #endif
767  }
768 
769 //------------------------------------------------------------------------
770 
771 String CudaCompiler::fixOptions(String opts)
772 {
773  // Override SM architecture.
774 
775  S32 smArch = m_overriddenSMArch;
776  if (!smArch && CudaModule::isAvailable())
778 
779  if (smArch)
780  {
781  opts = removeOption(opts, "-arch", true);
782  opts = removeOption(opts, "--gpu-architecture", true);
783  opts += sprintf("-arch sm_%d ", smArch);
784  }
785 
786  // Override pointer width.
787  // CUDA 3.2 => requires -m32 for x86 build and -m64 for x64 build.
788 
789  if (CudaModule::getDriverVersion() >= 32)
790  {
791  opts = removeOption(opts, "-m32", false);
792  opts = removeOption(opts, "-m64", false);
793  opts = removeOption(opts, "--machine", true);
794 
795 #if FW_64
796  opts += "-m64 ";
797 #else
798  opts += "-m32 ";
799 #endif
800  }
801  return opts;
802 }
803 
804 //------------------------------------------------------------------------
805 
806 String CudaCompiler::saveSource(void)
807 {
808  if (!m_inlineSource.getLength())
809  return m_sourceFile;
810 
811  String path = m_cachePath + "\\inline.cu";
812  File file(path, File::Create);
813  file.write(m_inlineSource.getPtr(), m_inlineSource.getLength());
814  return path;
815 }
816 
817 //------------------------------------------------------------------------
818 
819 void CudaCompiler::setLoggedError(const String& description, const String& logFile)
820 {
821  String message = description;
822  File file(logFile, File::Read);
823  BufferedInputStream in(file);
824  in.readLine();
825  for (;;)
826  {
827  const char* linePtr = in.readLine();
828  if (!linePtr)
829  break;
830  if (*linePtr)
831  message += '\n';
832  message += linePtr;
833  }
834  setError("%s", message.getPtr());
835 }
836 
837 //------------------------------------------------------------------------
#define NULL
Definition: Defs.hpp:39
String substring(int start, int end) const
Definition: String.cpp:96
virtual int read(void *ptr, int size)
Definition: File.cpp:267
bool endsWith(const String &str) const
Definition: String.cpp:273
const Array< U8 > * compileCubin(bool enablePrints=true, bool autoFail=true)
const char * getPtr(void) const
Definition: String.hpp:51
static int getDriverVersion(void)
Definition: CudaModule.cpp:495
String getFileName(void) const
Definition: String.cpp:284
static const String & getFrameworkPath(void)
void include(const String &path)
int indexOf(char chr) const
Definition: String.hpp:78
CudaModule * compile(bool enablePrints=true, bool autoFail=true)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule * module
Definition: DLLImports.inl:60
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer
Definition: DLLImports.inl:315
CudaModule * loadDbgCubin(bool enablePrints=true)
String formatInlineCuda(const char *file, int line, const char *code)
const char * name
Definition: DLLImports.cpp:42
void setError(const char *fmt,...)
Definition: Defs.cpp:253
String clearError(void)
Definition: Defs.cpp:269
unsigned __int64 U64
Definition: Defs.hpp:97
static void staticDeinit(void)
String compileCubinFile(bool enablePrints=true, bool autoFail=true)
static int getComputeCapability(void)
Definition: CudaModule.cpp:508
float F32
Definition: Defs.hpp:89
#define FW_HASH_MAGIC
Definition: Hash.hpp:169
S64 getSize(void) const
Definition: File.hpp:104
const String & getError(void)
Definition: Defs.cpp:296
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat GLfloat GLfloat z GLuint GLint GLenum GLboolean GLsizei const GLvoid pointer GLuint GLuint const GLchar name GLenum GLsizei GLenum GLsizei GLsizei height GLenum GLuint renderbuffer GLenum GLenum GLint params GLuint path
Definition: DLLImports.inl:382
void showModalMessage(const String &msg)
Definition: Window.cpp:487
static void flushMemCache(void)
signed int S32
Definition: Defs.hpp:88
int getLength(void) const
Definition: String.hpp:49
static void staticInit(void)
String sprintf(const char *fmt,...)
Definition: Defs.cpp:241
static bool isAvailable(void)
Definition: CudaModule.hpp:68
unsigned int U32
Definition: Defs.hpp:85
bool hasError(void)
Definition: Defs.cpp:289
U32 hashBits(U32 a, U32 b=FW_HASH_MAGIC, U32 c=0)
Definition: Hash.hpp:183
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value
Definition: DLLImports.inl:84
void printf(const char *fmt,...)
Definition: Defs.cpp:225
void failIfError(void)
Definition: Defs.cpp:361
bool startsWith(const String &str) const
Definition: String.cpp:261
U32 hash< String >(const String &value)
Definition: Hash.hpp:273
CudaModule * compileInlineCuda(const char *file, int line, const char *code)
void setInlineSource(const String &source, const String &origin="")
T set(S idx, const T &item)
Definition: Array.hpp:248
#define FW_JENKINS_MIX(a, b, c)
Definition: Hash.hpp:172
const T * getPtr(S idx=0) const
Definition: Array.hpp:202
void fail(const char *fmt,...)
Definition: Defs.cpp:304
void addOptions(const String &options)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr size
Definition: DLLImports.inl:319
S getSize(void) const
Definition: Array.hpp:188