41 #define SHOW_TOOL_PATHS 1
42 #define SHOW_NVCC_OUTPUT 0
46 String CudaCompiler::s_frameworkPath;
47 String CudaCompiler::s_staticCudaBinPath;
48 String CudaCompiler::s_staticOptions;
49 String CudaCompiler::s_staticPreamble;
50 String CudaCompiler::s_staticBinaryFormat;
52 bool CudaCompiler::s_inited =
false;
55 U32 CudaCompiler::s_nvccVersionHash = 0;
56 String CudaCompiler::s_nvccCommand;
62 static const char s_header[] =
63 "#include \"base/Math.hpp\"\n"
64 "#include <stdio.h>\n"
71 static const char s_footer[] =
80 fail(
"FW_INLINE_CUDA: Framework path not defined! Please call CudaCompiler::setFrameworkPath().");
85 for (
int i = 0; i < fixedFile.
getSize(); i++)
86 if (fixedFile[i] ==
'\\')
92 for (
int i = 0; s_header[i]; i++)
93 if (s_header[i] ==
'\n')
95 for (
int i = 0; code[i]; i++)
101 return sprintf(
"#line %d \"%s\"\n%s%s%s\n", line - numLinefeeds, fixedFile.
getPtr(), s_header, code, s_footer);
117 : m_cachePath (
"cudacache"),
118 m_overriddenSMArch (0),
125 m_sourceHashValid (false),
126 m_optionHashValid (false),
127 m_defineHashValid (false),
128 m_preambleHashValid (false),
129 m_memHashValid (false),
151 U64 memHash = getMemHash();
152 CudaModule** found = s_moduleCache.search(memHash);
170 s_moduleCache.add(memHash, module);
180 String cubinDir =
"build\\Win32";
182 String cubinDir =
"build\\x64_Debug\\cuda";
185 cubinDir +=
"_Debug\\cuda";
187 cubinDir +=
"_Release\\cuda";
190 String fullCubinPath = cubinDir +
"\\" + cubinFile;
198 U64 memHash = getMemHash();
199 s_moduleCache.add(memHash, module);
215 U64 memHash = getMemHash();
216 Array<U8>** found = s_cubinCache.search(memHash);
236 s_cubinCache.add(memHash, cubin);
251 setError(
"CudaCompiler: No source file specified!");
269 String cubinFile, finalOpts;
270 runPreprocessor(cubinFile, finalOpts);
278 if (fileExists(cubinFile))
286 printf(
"CudaCompiler: Compiling '%s'...", m_sourceFile.
getPtr());
288 printf(
"CudaCompiler: Compiling inline code from '%s'...", m_inlineOrigin.
getPtr());
290 printf(
"CudaCompiler: Compiling inline code...");
293 m_window->
showModalMessage(
"Compiling CUDA kernel...\nThis will take a few seconds.");
295 runCompiler(cubinFile, finalOpts);
301 return (
hasError()) ?
"" : cubinFile;
318 for (
char drive =
'C'; drive <=
'E'; drive++)
320 for (
int progX86 = 0; progX86 <= 1; progX86++)
322 String prog =
sprintf(
"%c:\\%s", drive, (progX86 == 0) ?
"Program Files" :
"Program Files (x86)");
323 potentialCudaPaths.
add(prog +
sprintf(
"\\NVIDIA GPU Computing Toolkit\\CUDA\\v%.1f", driverVersion));
324 potentialVSPaths.
add(prog +
"\\Microsoft Visual Studio 10.0");
325 potentialVSPaths.
add(prog +
"\\Microsoft Visual Studio 9.0");
326 potentialVSPaths.
add(prog +
"\\Microsoft Visual Studio 8");
328 potentialCudaPaths.
add(
sprintf(
"%c:\\CUDA", drive));
333 String pathEnv = queryEnv(
"PATH");
334 String includeEnv = queryEnv(
"INCLUDE");
335 String cudaBinEnv = queryEnv(
"CUDA_BIN_PATH");
336 String cudaIncEnv = queryEnv(
"CUDA_INC_PATH");
342 cudaBinList.
add(s_staticCudaBinPath);
345 cudaBinList.
add(cudaBinEnv);
346 splitPathList(cudaBinList, pathEnv);
347 for (
int i = 0; i < potentialCudaPaths.
getSize(); i++)
349 cudaBinList.
add(potentialCudaPaths[i] +
"\\bin");
350 cudaBinList.
add(potentialCudaPaths[i] +
"\\bin64");
355 for (
int i = 0; i < cudaBinList.
getSize(); i++)
357 if (!cudaBinList[i].getLength() || !fileExists(cudaBinList[i] +
"\\nvcc.exe"))
362 FILE* pipe = _popen(
sprintf(
"\"%s\\nvcc.exe\" --version 2>nul", cudaBinList[i].getPtr()).getPtr(),
"rt");
368 output.
add((
char)fgetc(pipe));
375 if (!response.startsWith(
"nvcc: NVIDIA"))
380 cudaBinPath = cudaBinList[i];
386 fail(
"Unable to detect CUDA Toolkit binary path!\nPlease set CUDA_BIN_PATH environment variable.");
391 splitPathList(vsBinList, pathEnv);
392 for (
int i = 0; i < potentialVSPaths.
getSize(); i++)
393 vsBinList.
add(potentialVSPaths[i] +
"\\VC\\bin");
396 for (
int i = 0; i < vsBinList.
getSize(); i++)
398 if (vsBinList[i].getLength() && fileExists(vsBinList[i] +
"\\vcvars32.bat"))
400 vsBinPath = vsBinList[i];
406 fail(
"Unable to detect Visual Studio binary path!\nPlease run VCVARS32.BAT.");
411 cudaIncList.
add(cudaBinPath +
"\\..\\include");
412 cudaIncList.
add(cudaIncEnv);
413 splitPathList(cudaIncList, includeEnv);
414 cudaIncList.
add(
"C:\\CUDA\\include");
415 cudaIncList.
add(
"D:\\CUDA\\include");
418 for (
int i = 0; i < cudaIncList.
getSize(); i++)
420 if (cudaIncList[i].getLength() && fileExists(cudaIncList[i] +
"\\cuda.h"))
422 cudaIncPath = cudaIncList[i];
428 fail(
"Unable to detect CUDA Toolkit include path!\nPlease set CUDA_INC_PATH environment variable.");
433 vsIncList.
add(vsBinPath +
"\\..\\INCLUDE");
434 splitPathList(vsIncList, includeEnv);
435 for (
int i = 0; i < potentialVSPaths.
getSize(); i++)
436 vsIncList.
add(potentialVSPaths[i] +
"\\VC\\INCLUDE");
439 for (
int i = 0; i < vsIncList.
getSize(); i++)
441 if (vsIncList[i].getLength() && fileExists(vsIncList[i] +
"\\crtdefs.h"))
443 vsIncPath = vsIncList[i];
449 fail(
"Unable to detect Visual Studio include path!\nPlease run VCVARS32.BAT.");
455 printf(
"CUDA binary path: \"%s\"\n", cudaBinPath.
getPtr());
456 printf(
"CUDA include path: \"%s\"\n", cudaIncPath.
getPtr());
458 printf(
"VS include path: \"%s\"\n", vsIncPath.
getPtr());
464 s_nvccCommand =
sprintf(
"set PATH=%s;%s & nvcc.exe -ccbin \"%s\" -I\"%s\" -I\"%s\" -I. -D_CRT_SECURE_NO_DEPRECATE",
476 s_frameworkPath =
"";
477 s_staticCudaBinPath =
"";
478 s_staticOptions =
"";
479 s_staticPreamble =
"";
480 s_staticBinaryFormat =
"";
487 s_cubinCache.reset();
488 s_moduleCache.reset();
496 for (
int i = s_cubinCache.firstSlot(); i != -1; i = s_cubinCache.nextSlot(i))
497 delete s_cubinCache.getSlot(i).value;
498 s_cubinCache.clear();
500 for (
int i = s_moduleCache.firstSlot(); i != -1; i = s_moduleCache.nextSlot(i))
501 delete s_moduleCache.getSlot(i).value;
502 s_moduleCache.clear();
511 DWORD bufferSize = GetEnvironmentVariable(name.
getPtr(),
NULL, 0);
517 char*
buffer =
new char[bufferSize];
519 GetEnvironmentVariable(name.
getPtr(),
buffer, bufferSize);
532 for (
int startIdx = 0; startIdx < value.
getLength();)
534 int endIdx = value.
indexOf(
';', startIdx);
543 startIdx = endIdx + 1;
549 bool CudaCompiler::fileExists(
const String& name)
551 return ((GetFileAttributes(name.
getPtr()) & FILE_ATTRIBUTE_DIRECTORY) == 0);
556 String CudaCompiler::removeOption(
const String& opts,
const String& tag,
bool hasParam)
559 for (
int i = 0; i < res.
getLength(); i++)
562 for (
int j = 0; match && j < tag.
getLength(); j++)
563 match = (i + j < res.
getLength() && res[i + j] == tag[j]);
568 if (idx != -1 && hasParam)
569 idx = res.
indexOf(
' ', idx + 1);
579 U64 CudaCompiler::getMemHash(
void)
584 if (!m_sourceHashValid)
587 m_sourceHashValid =
true;
590 if (!m_optionHashValid)
593 m_optionHashValid =
true;
596 if (!m_defineHashValid)
599 for (
int i = m_defines.firstSlot(); i != -1; i = m_defines.nextSlot(i))
605 m_defineHash = ((
U64)b << 32) | c;
606 m_defineHashValid =
true;
609 if (!m_preambleHashValid)
612 m_preambleHashValid =
true;
619 a += (
U32)(m_defineHash >> 32);
620 b += (
U32)m_defineHash;
622 m_memHash = ((
U64)b << 32) | c;
623 m_memHashValid =
true;
629 void CudaCompiler::createCacheDir(
void)
631 DWORD res = GetFileAttributes(m_cachePath.
getPtr());
632 if (res == 0xFFFFFFFF || (res & FILE_ATTRIBUTE_DIRECTORY) == 0)
633 if (CreateDirectory(m_cachePath.
getPtr(),
NULL) == 0)
634 fail(
"Cannot create CudaCompiler cache directory '%s'!", m_cachePath.
getPtr());
639 void CudaCompiler::writeDefineFile(
void)
643 for (
int i = m_defines.firstSlot(); i != -1; i = m_defines.nextSlot(i))
644 out.writef(
"#define %s %s\n",
645 m_defines.getSlot(i).key.getPtr(),
646 m_defines.getSlot(i).value.getPtr());
647 out.writef(
"%s\n", s_staticPreamble.
getPtr());
648 out.writef(
"%s\n", m_preamble.
getPtr());
654 void CudaCompiler::initLogFile(
const String& name,
const String& firstLine)
658 out.writef(
"%s\n", firstLine.
getPtr());
664 void CudaCompiler::runPreprocessor(
String& cubinFile,
String& finalOpts)
670 finalOpts += s_staticOptions +
" ";
671 finalOpts += m_options;
672 finalOpts = fixOptions(finalOpts);
676 String logFile = m_cachePath +
"\\preprocess.log";
677 String cmd =
sprintf(
"%s -E -o \"%s\\preprocessed.cu\" -include \"%s\\defines.inl\" %s \"%s\" 2>>\"%s\"",
689 initLogFile(logFile, cmd);
690 if (system(cmd.
getPtr()) != 0)
692 setLoggedError(
"CudaCompiler: Preprocessing failed!", logFile);
699 finalOpts += s_staticBinaryFormat;
701 finalOpts +=
"-cubin";
713 for (
int lineIdx = 0;; lineIdx++)
715 const char* linePtr = in.readLine();
721 while (*linePtr ==
' ' || *linePtr ==
'\t')
726 if (*linePtr ==
'\0')
continue;
727 if (*linePtr ==
'#')
continue;
728 if (*linePtr ==
'/' && linePtr[1] ==
'/')
continue;
738 finalOpts = fixOptions(finalOpts);
740 hashB += s_nvccVersionHash;
742 cubinFile =
sprintf(
"%s\\%08x%08x.cubin", m_cachePath.
getPtr(), hashB, hashC);
747 void CudaCompiler::runCompiler(
const String& cubinFile,
const String& finalOpts)
749 String logFile = m_cachePath +
"\\compile.log";
750 String cmd =
sprintf(
"%s -o \"%s\" -include \"%s\\defines.inl\" %s \"%s\" 2>>\"%s\"",
758 initLogFile(logFile, cmd);
759 if (system(cmd.
getPtr()) != 0 || !fileExists(cubinFile))
760 setLoggedError(
"CudaCompiler: Compilation failed!", logFile);
763 setLoggedError(
"", logFile);
775 S32 smArch = m_overriddenSMArch;
781 opts = removeOption(opts,
"-arch",
true);
782 opts = removeOption(opts,
"--gpu-architecture",
true);
783 opts +=
sprintf(
"-arch sm_%d ", smArch);
791 opts = removeOption(opts,
"-m32",
false);
792 opts = removeOption(opts,
"-m64",
false);
793 opts = removeOption(opts,
"--machine",
true);
806 String CudaCompiler::saveSource(
void)
819 void CudaCompiler::setLoggedError(
const String& description,
const String& logFile)
821 String message = description;
827 const char* linePtr = in.readLine();
String substring(int start, int end) const
virtual int read(void *ptr, int size)
bool endsWith(const String &str) const
const Array< U8 > * compileCubin(bool enablePrints=true, bool autoFail=true)
const char * getPtr(void) const
static int getDriverVersion(void)
String getFileName(void) const
static const String & getFrameworkPath(void)
void include(const String &path)
int indexOf(char chr) const
CudaModule * compile(bool enablePrints=true, bool autoFail=true)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule * module
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer
CudaModule * loadDbgCubin(bool enablePrints=true)
String formatInlineCuda(const char *file, int line, const char *code)
void setError(const char *fmt,...)
static void staticDeinit(void)
String compileCubinFile(bool enablePrints=true, bool autoFail=true)
static int getComputeCapability(void)
const String & getError(void)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr const GLvoid GLenum usage GLuint shader GLenum type GLsizei const GLuint framebuffers GLsizei const GLuint renderbuffers GLuint v GLuint v GLenum GLenum GLenum GLuint GLint level GLsizei GLuint framebuffers GLuint const GLchar name GLenum GLintptr GLsizeiptr GLvoid data GLuint GLenum GLint param GLuint GLenum GLint param GLhandleARB programObj GLenum GLenum GLsizei GLsizei height GLenum GLint GLint GLsizei GLsizei GLsizei GLint GLenum GLenum const GLvoid pixels GLint GLsizei const GLfloat value GLint GLfloat GLfloat v1 GLint GLfloat GLfloat GLfloat v2 GLint GLsizei const GLfloat value GLint GLsizei GLboolean const GLfloat value GLuint program GLuint GLfloat GLfloat GLfloat z GLuint GLint GLenum GLboolean GLsizei const GLvoid pointer GLuint GLuint const GLchar name GLenum GLsizei GLenum GLsizei GLsizei height GLenum GLuint renderbuffer GLenum GLenum GLint params GLuint path
void showModalMessage(const String &msg)
static void flushMemCache(void)
int getLength(void) const
static void staticInit(void)
String sprintf(const char *fmt,...)
static bool isAvailable(void)
U32 hashBits(U32 a, U32 b=FW_HASH_MAGIC, U32 c=0)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value
void printf(const char *fmt,...)
bool startsWith(const String &str) const
U32 hash< String >(const String &value)
CudaModule * compileInlineCuda(const char *file, int line, const char *code)
void setInlineSource(const String &source, const String &origin="")
T set(S idx, const T &item)
#define FW_JENKINS_MIX(a, b, c)
const T * getPtr(S idx=0) const
void fail(const char *fmt,...)
void addOptions(const String &options)
CUdevice int ordinal char int CUdevice dev CUdevprop CUdevice dev CUcontext ctx CUcontext ctx CUcontext pctx CUmodule const void image CUmodule const void fatCubin CUfunction CUmodule const char name void p CUfunction unsigned int bytes CUtexref pTexRef CUtexref CUarray unsigned int Flags CUtexref int CUaddress_mode am CUtexref unsigned int Flags CUaddress_mode CUtexref int dim CUarray_format int CUtexref hTexRef CUfunction unsigned int numbytes CUfunction int float value CUfunction int CUtexref hTexRef CUfunction int int grid_height CUevent unsigned int Flags CUevent hEvent CUevent hEvent CUstream unsigned int Flags CUstream hStream GLuint bufferobj unsigned int CUdevice dev CUdeviceptr unsigned int CUmodule const char name CUdeviceptr unsigned int bytesize CUdeviceptr dptr void unsigned int bytesize void CUdeviceptr unsigned int ByteCount CUarray unsigned int CUdeviceptr unsigned int ByteCount CUarray unsigned int const void unsigned int ByteCount CUarray unsigned int CUarray unsigned int unsigned int ByteCount void CUarray unsigned int unsigned int CUstream hStream const CUDA_MEMCPY2D pCopy CUdeviceptr const void unsigned int CUstream hStream const CUDA_MEMCPY2D CUstream hStream CUdeviceptr unsigned char unsigned int N CUdeviceptr unsigned int unsigned int N CUdeviceptr unsigned int unsigned short unsigned int unsigned int Height CUarray const CUDA_ARRAY_DESCRIPTOR pAllocateArray CUarray const CUDA_ARRAY3D_DESCRIPTOR pAllocateArray unsigned int CUtexref CUdeviceptr unsigned int bytes CUcontext unsigned int CUdevice device GLenum texture GLenum GLuint buffer GLenum GLuint renderbuffer GLenum GLsizeiptr size