added ptxgen

This commit is contained in:
Evghenii
2014-02-20 10:50:17 +01:00
parent dea856b7e3
commit c54a91eab3
2 changed files with 263 additions and 155 deletions

View File

@@ -79,13 +79,13 @@ static std::vector<std::string> lSplitString(const std::string &s, char delim)
static void lUsage(const int ret) static void lUsage(const int ret)
{ {
fprintf(stderr, "\nusage: ptxcc\n"); fprintf(stdout, "\nusage: ptxcc\n");
fprintf(stderr, " [--help]\t\t\t\t This help\n"); fprintf(stdout, " [--help]\t\t\t\t This help\n");
fprintf(stderr, " [--verbose]\t\t\t\t Be verbose\n"); fprintf(stdout, " [--verbose]\t\t\t\t Be verbose\n");
fprintf(stderr, " [--arch={%s}]\t\t\t GPU target architecture\n", "sm_35"); fprintf(stdout, " [--arch={%s}]\t\t\t GPU target architecture\n", "sm_35");
fprintf(stderr, " [-o <name>]\t\t\t\t Output file name\n"); fprintf(stdout, " [-o <name>]\t\t\t\t Output file name\n");
fprintf(stderr, " [-Xnvcc=<arguments>]\t\t Arguments to pass through to \"nvcc\"\n"); fprintf(stdout, " [-Xnvcc=<arguments>]\t\t Arguments to pass through to \"nvcc\"\n");
fprintf(stderr, " \n"); fprintf(stdout, " \n");
exit(ret); exit(ret);
} }

View File

@@ -1,37 +1,29 @@
/* #include <cstdio>
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved. #include <cstdlib>
* #include <cstring>
* Please refer to the NVIDIA end user license agreement (EULA) associated #include <cassert>
* with this source code for terms and conditions that govern your use of #include <iostream>
* this software. Any use, reproduction, disclosure, or distribution of #include <fstream>
* this software and related documentation outside the terms of the EULA #include <sstream>
* is strictly prohibited. #include <string>
* #include <vector>
*/
#include <nvvm.h> #include <nvvm.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h> #include <sys/stat.h>
/* Two levels of indirection to stringify LIBDEVICE_MAJOR_VERSION and
* LIBDEVICE_MINOR_VERSION correctly. */
#define getLibDeviceNameForArch(ARCH) \
_getLibDeviceNameForArch(ARCH, \
LIBDEVICE_MAJOR_VERSION, \
LIBDEVICE_MINOR_VERSION)
#define _getLibDeviceNameForArch(ARCH, MAJOR, MINOR) \
__getLibDeviceNameForArch(ARCH, MAJOR, MINOR)
#define __getLibDeviceNameForArch(ARCH, MAJOR, MINOR) \
("/libdevice/libdevice.compute_" #ARCH "." #MAJOR #MINOR ".bc")
#define getLibnvvmHome _getLibnvvmHome(LIBNVVM_HOME) template<typename T>
#define _getLibnvvmHome(NVVM_HOME) __getLibnvvmHome(NVVM_HOME) static std::string lValueToString(const T& value)
#define __getLibnvvmHome(NVVM_HOME) (#NVVM_HOME) {
std::ostringstream oss;
oss << value;
return oss.str();
}
typedef struct stat Stat; typedef struct stat Stat;
#define PTXGENStatus int
enum { enum {
PTXGEN_SUCCESS = 0x0000, PTXGEN_SUCCESS = 0x0000,
PTXGEN_FILE_IO_ERROR = 0x0001, PTXGEN_FILE_IO_ERROR = 0x0001,
@@ -42,54 +34,50 @@ enum {
PTXGEN_LIBNVVM_HOME_UNDEFINED = 0x0020, PTXGEN_LIBNVVM_HOME_UNDEFINED = 0x0020,
PTXGEN_LIBNVVM_VERIFICATION_ERROR = 0x0040 PTXGEN_LIBNVVM_VERIFICATION_ERROR = 0x0040
}; };
#define PTXGENStatus int
static PTXGENStatus getLibDeviceName(int computeArch, char **buffer) static PTXGENStatus getLibDeviceName(const int computeArch, std::string &libDeviceName)
{ {
const char *libnvvmPath = getLibnvvmHome; const char *env = getenv("LIBNVVM_HOME");
const char *libdevice = NULL; #ifdef LIBNVVM_HOME
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
const std::string libnvvmPath(env ? env : TOSTRING(LIBNVVM_HOME));
#undef TOSTRING
#undef STRINGIFY
#else
const std::string libnvvmPath(env);
#endif
if (libnvvmPath == NULL) { if (libnvvmPath.empty())
fprintf(stderr, "The environment variable LIBNVVM_HOME undefined\n"); {
fprintf(stderr, "The environment variable LIBNVVM_HOME is undefined\n");
return PTXGEN_LIBNVVM_HOME_UNDEFINED; return PTXGEN_LIBNVVM_HOME_UNDEFINED;
} }
/* Use libdevice for compute_20, if the target is not compute_20, compute_30, /* Use libdevice for compute_20, if the target is not compute_20, compute_30,
* or compute_35. */ * or compute_35. */
switch (computeArch) { const std::string libdevice =
default: std::string("/libdevice/libdevice.compute_") +
libdevice = getLibDeviceNameForArch(20); lValueToString(computeArch)+ "." +
break; lValueToString(LIBDEVICE_MAJOR_VERSION) +
case 30: lValueToString(LIBDEVICE_MINOR_VERSION) +
libdevice = getLibDeviceNameForArch(30); ".bc";
break;
case 35:
libdevice = getLibDeviceNameForArch(35);
break;
}
*buffer = (char *) malloc(strlen(libnvvmPath) + strlen(libdevice) + 1); libDeviceName = libnvvmPath + libdevice;
if (*buffer == NULL) {
fprintf(stderr, "Failed to allocate memory\n");
return PTXGEN_BAD_ALLOC_ERROR;
}
/* Concatenate libnvvmPath and name. */
*buffer = strcat(strcpy(*buffer, libnvvmPath), libdevice);
return PTXGEN_SUCCESS; return PTXGEN_SUCCESS;
} }
static PTXGENStatus addFileToProgram(const char *filename, nvvmProgram prog) static PTXGENStatus addFileToProgram(const std::string &filename, nvvmProgram prog)
{ {
char *buffer; char *buffer;
size_t size; size_t size;
Stat fileStat; Stat fileStat;
/* Open the input file. */ /* Open the input file. */
FILE *f = fopen(filename, "rb"); FILE *f = fopen(filename.c_str(), "rb");
if (f == NULL) { if (f == NULL) {
fprintf(stderr, "Failed to open %s\n", filename); fprintf(stderr, "Failed to open %s\n", filename.c_str());
return PTXGEN_FILE_IO_ERROR; return PTXGEN_FILE_IO_ERROR;
} }
@@ -102,17 +90,17 @@ static PTXGENStatus addFileToProgram(const char *filename, nvvmProgram prog)
} }
size = fread(buffer, 1, fileStat.st_size, f); size = fread(buffer, 1, fileStat.st_size, f);
if (ferror(f)) { if (ferror(f)) {
fprintf(stderr, "Failed to read %s\n", filename); fprintf(stderr, "Failed to read %s\n", filename.c_str());
fclose(f); fclose(f);
free(buffer); free(buffer);
return PTXGEN_FILE_IO_ERROR; return PTXGEN_FILE_IO_ERROR;
} }
fclose(f); fclose(f);
if (nvvmAddModuleToProgram(prog, buffer, size, filename) != NVVM_SUCCESS) { if (nvvmAddModuleToProgram(prog, buffer, size, filename.c_str()) != NVVM_SUCCESS) {
fprintf(stderr, fprintf(stderr,
"Failed to add the module %s to the compilation unit\n", "Failed to add the module %s to the compilation unit\n",
filename); filename.c_str());
free(buffer); free(buffer);
return PTXGEN_LIBNVVM_ERROR; return PTXGEN_LIBNVVM_ERROR;
} }
@@ -121,45 +109,58 @@ static PTXGENStatus addFileToProgram(const char *filename, nvvmProgram prog)
return PTXGEN_SUCCESS; return PTXGEN_SUCCESS;
} }
static PTXGENStatus generatePTX(int numOptions, const char **options, static PTXGENStatus generatePTX(
int numFilenames, const char **filenames, std::vector<std::string> nvvmOptions,
int computeArch) std::vector<std::string> nvvmFiles,
std::ostream &out,
const int computeArch)
{ {
nvvmProgram prog;
PTXGENStatus status; PTXGENStatus status;
nvvmProgram prog;
char *libDeviceName;
int i;
/* Create the compiliation unit. */ /* Create the compiliation unit. */
if (nvvmCreateProgram(&prog) != NVVM_SUCCESS) { if (nvvmCreateProgram(&prog) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to create the compilation unit\n"); fprintf(stderr, "Failed to create the compilation unit\n");
return PTXGEN_LIBNVVM_ERROR; return PTXGEN_LIBNVVM_ERROR;
} }
/* Add libdevice. */ /* Add libdevice. */
status = getLibDeviceName(computeArch, &libDeviceName); std::string libDeviceName;
if (status != PTXGEN_SUCCESS) { status = getLibDeviceName(computeArch, libDeviceName);
if (status != PTXGEN_SUCCESS)
{
nvvmDestroyProgram(&prog); nvvmDestroyProgram(&prog);
return status; return status;
} }
status = addFileToProgram(libDeviceName, prog); status = addFileToProgram(libDeviceName, prog);
free(libDeviceName); if (status != PTXGEN_SUCCESS)
if (status != PTXGEN_SUCCESS) { {
fprintf(stderr, "Please double-check LIBNVVM_HOME environmental variable.\n");
nvvmDestroyProgram(&prog); nvvmDestroyProgram(&prog);
return status; return status;
} }
/* Add the module to the compilation unit. */ /* Add the module to the compilation unit. */
for (i = 0; i < numFilenames; ++i) { for (int i = 0; i < (int)nvvmFiles.size(); ++i)
status = addFileToProgram(filenames[i], prog); {
if (status != PTXGEN_SUCCESS) { status = addFileToProgram(nvvmFiles[i], prog);
if (status != PTXGEN_SUCCESS)
{
nvvmDestroyProgram(&prog); nvvmDestroyProgram(&prog);
return status; return status;
} }
} }
const int numOptions = nvvmOptions.size();
std::vector<const char*> options(numOptions);
for (int i = 0; i < numOptions; i++)
options[i] = nvvmOptions[i].c_str();
/* Verify the compilation unit. */ /* Verify the compilation unit. */
if (nvvmVerifyProgram(prog, numOptions, options) != NVVM_SUCCESS) { if (nvvmVerifyProgram(prog, numOptions, &options[0]) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to verify the compilation unit\n"); fprintf(stderr, "Failed to verify the compilation unit\n");
status |= PTXGEN_LIBNVVM_VERIFICATION_ERROR; status |= PTXGEN_LIBNVVM_VERIFICATION_ERROR;
} }
@@ -167,74 +168,81 @@ static PTXGENStatus generatePTX(int numOptions, const char **options,
/* Print warnings and errors. */ /* Print warnings and errors. */
{ {
size_t logSize; size_t logSize;
char *log; if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS)
if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS) { {
fprintf(stderr, "Failed to get the compilation log size\n"); fprintf(stderr, "Failed to get the compilation log size\n");
status |= PTXGEN_LIBNVVM_ERROR; status |= PTXGEN_LIBNVVM_ERROR;
} else { }
log = (char *) malloc(logSize); else
if (log == NULL) { {
fprintf(stderr, "Failed to allocate memory\n"); std::string log(logSize,0);
status |= PTXGEN_BAD_ALLOC_ERROR; if (nvvmGetProgramLog(prog, &log[0]) != NVVM_SUCCESS)
} else if (nvvmGetProgramLog(prog, log) != NVVM_SUCCESS) { {
fprintf(stderr, "Failed to get the compilation log\n"); fprintf(stderr, "Failed to get the compilation log\n");
status |= PTXGEN_LIBNVVM_ERROR; status |= PTXGEN_LIBNVVM_ERROR;
} else { }
fprintf(stderr, "%s\n", log); else
{
fprintf(stderr, "%s\n", log.c_str());
} }
free(log);
} }
} }
if (status & PTXGEN_LIBNVVM_VERIFICATION_ERROR) { if (status & PTXGEN_LIBNVVM_VERIFICATION_ERROR)
{
nvvmDestroyProgram(&prog); nvvmDestroyProgram(&prog);
return status; return status;
} }
/* Compile the compilation unit. */ /* Compile the compilation unit. */
if (nvvmCompileProgram(prog, numOptions, options) != NVVM_SUCCESS) { if (nvvmCompileProgram(prog, numOptions, &options[0]) != NVVM_SUCCESS)
{
fprintf(stderr, "Failed to generate PTX from the compilation unit\n"); fprintf(stderr, "Failed to generate PTX from the compilation unit\n");
status |= PTXGEN_LIBNVVM_COMPILATION_ERROR; status |= PTXGEN_LIBNVVM_COMPILATION_ERROR;
} else { }
else
{
size_t ptxSize; size_t ptxSize;
char *ptx; if (nvvmGetCompiledResultSize(prog, &ptxSize) != NVVM_SUCCESS)
if (nvvmGetCompiledResultSize(prog, &ptxSize) != NVVM_SUCCESS) { {
fprintf(stderr, "Failed to get the PTX output size\n"); fprintf(stderr, "Failed to get the PTX output size\n");
status |= PTXGEN_LIBNVVM_ERROR; status |= PTXGEN_LIBNVVM_ERROR;
} else { }
ptx = (char *) malloc(ptxSize); else
if (ptx == NULL) { {
fprintf(stderr, "Failed to allocate memory\n"); std::string ptx(ptxSize,0);
status |= PTXGEN_BAD_ALLOC_ERROR; if (nvvmGetCompiledResult(prog, &ptx[0]) != NVVM_SUCCESS)
} else if (nvvmGetCompiledResult(prog, ptx) != NVVM_SUCCESS) { {
fprintf(stderr, "Failed to get the PTX output\n"); fprintf(stderr, "Failed to get the PTX output\n");
status |= PTXGEN_LIBNVVM_ERROR; status |= PTXGEN_LIBNVVM_ERROR;
} else { }
fprintf(stdout, "%s\n", ptx); else
{
out << ptx;
} }
free(ptx);
} }
} }
/* Print warnings and errors. */ /* Print warnings and errors. */
{ {
size_t logSize; size_t logSize;
char *log; if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS)
if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS) { {
fprintf(stderr, "Failed to get the compilation log size\n"); fprintf(stderr, "Failed to get the compilation log size\n");
status |= PTXGEN_LIBNVVM_ERROR; status |= PTXGEN_LIBNVVM_ERROR;
} else { }
log = (char *) malloc(logSize); else
if (log == NULL) { {
fprintf(stderr, "Failed to allocate memory\n"); std::string log(logSize,0);
status |= PTXGEN_BAD_ALLOC_ERROR; if (nvvmGetProgramLog(prog, &log[0]) != NVVM_SUCCESS)
} else if (nvvmGetProgramLog(prog, log) != NVVM_SUCCESS) { {
fprintf(stderr, "Failed to get the compilation log\n"); fprintf(stderr, "Failed to get the compilation log\n");
status |= PTXGEN_LIBNVVM_ERROR; status |= PTXGEN_LIBNVVM_ERROR;
} else { }
fprintf(stderr, "%s\n", log); else
{
fprintf(stderr, "%s\n", log.c_str());
} }
free(log);
} }
} }
@@ -250,51 +258,151 @@ static void showUsage()
" [FILE] could be a .bc file or a .ll file\n"); " [FILE] could be a .bc file or a .ll file\n");
} }
static void lUsage(const int ret)
{
fprintf(stdout, "\nusage: ptxgen\n");
fprintf(stdout, " [--help]\t\t This help\n");
fprintf(stdout, " [--verbose]\t\t Be verbose\n");
fprintf(stdout, " [--arch={%s}]\t GPU target architecture\n", "sm_35");
fprintf(stdout, " [-o <name>]\t\t Output file name\n");
fprintf(stdout, " [-g]\t\t Enable generation of debuggin information \n");
fprintf(stdout, " [--opt=]\t\t Optimization parameters \n");
fprintf(stdout, " \t\t\t 0 - disable optimizations \n");
fprintf(stdout, " \t\t\t 3 - defalt, enable optimizations \n");
fprintf(stdout, " [--ftz=]\t\t Flush-to-zero mode when performsing single-precision floating-point operations\n");
fprintf(stdout, " \t\t\t 0 - default, preserve denormal values\n");
fprintf(stdout, " \t\t\t 1 - flush denormal values to zero\n");
fprintf(stdout, " [--prec-sqrt=]\t Precision mode for single-precision floating-point square root\n");
fprintf(stdout, " \t\t\t 0 - use a faster approximation\n");
fprintf(stdout, " \t\t\t 1 - default, use IEEE round-to-nearest mode\n");
fprintf(stdout, " [--prec-div=]\t Precision mode for single-precision floating-point division and reciprocals\n");
fprintf(stdout, " \t\t\t 0 - use a faster approximation\n");
fprintf(stdout, " \t\t\t 1 - default, use IEEE round-to-nearest mode\n");
fprintf(stdout, " [--fma=]\t\t FMA contraction mode \n");
fprintf(stdout, " \t\t\t 0 - disable\n");
fprintf(stdout, " \t\t\t 1 - default, enable\n");
fprintf(stdout, " [--use_fast_math]\t Make use of fast maih. Implies --ftz=1 --prec-div=0 --prec-sqrt=0\n");
fprintf(stdout, " \n");
exit(ret);
}
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
PTXGENStatus status = PTXGEN_SUCCESS; int _opt = 3;
int numOptions = 0; int _ftz = 0;
char **options = NULL; int _precSqrt = 1;
int numFilenames = 0; int _precDiv = 1;
char **filenames = NULL; int _fma = 1;
int computeArch = 35; bool _useFastMath = false;
int i; bool _debug = false;
bool _verbose = false;
std::string _arch = "sm_35";
std::string fileIR, filePTX;
/* Process the command-line arguments to extract the libnvvm options and the for (int i = 1; i < argc; ++i)
* input file names. */ {
if (argc == 1) { if (!strcmp(argv[i], "--help"))
showUsage(); lUsage(0);
return PTXGEN_INVALID_USAGE; else if (!strncmp(argv[i], "--arch=", 7))
} _arch = std::string(argv[i]+7);
else if (!strncmp(argv[i], "-g", 2))
options = (char **) malloc((argc ) * sizeof (char *)); _debug = true;
filenames = (char **) malloc((argc - 1) * sizeof (char *)); else if (!strncmp(argv[i], "--verbose", 9))
_verbose = true;
for (i = 1; i < argc; ++i) { else if (!strncmp(argv[i], "--opt=", 6))
if (argv[i][0] == '-') { _opt = atoi(argv[i]+6);
options[numOptions] = argv[i]; else if (!strncmp(argv[i], "--ftz=", 6))
++numOptions; _ftz = atoi(argv[i]+6);
} else { else if (!strncmp(argv[i], "--prec-sqrt=", 12))
filenames[numFilenames] = argv[i]; _precSqrt = atoi(argv[i]+12);
++numFilenames; else if (!strncmp(argv[i], "--prec-div=", 11))
_precDiv = atoi(argv[i]+11);
else if (!strncmp(argv[i], "--fma=", 6))
_fma = atoi(argv[i]+6);
else if (!strncmp(argv[i], "--use_fast_math", 15))
_useFastMath = true;
else if (!strcmp(argv[i], "-o"))
{
if (++i == argc)
{
fprintf(stderr, "No output file specified after -o option.\n");
lUsage(1);
}
filePTX = std::string(argv[i]);
}
else
{
const char * ext = strrchr(argv[i], '.');
if (ext == NULL)
{
fprintf(stderr, " Unknown argument: %s \n", argv[i]);
lUsage(1);
}
else if (strncmp(ext, ".ll", 3) && strncmp(ext, ".bc", 3))
{
fprintf(stderr, " Unkown extension of the input file: %s \n", ext);
lUsage(1);
}
else if (filePTX.empty())
{
fileIR = std::string(argv[i]);
if (filePTX.empty())
{
char * baseName = argv[i];
while (baseName != ext)
filePTX += std::string(baseName++,1);
}
filePTX += ".ptx";
}
} }
} }
static char sm_35opt[] = "-arch=compute_35";
options[numOptions] = sm_35opt; if (fileIR.empty())
numOptions++; {
fprintf(stderr, "ptxgen fatal : No input file specified; use option --help for more information\n");
if (numFilenames == 0) { exit(1);
/* If no input filename is found, then show the usage. */
showUsage();
status = PTXGEN_INVALID_USAGE;
} else {
/* Run libnvvm to generate PTX. */
status = generatePTX(numOptions, (const char **) options,
numFilenames, (const char **) filenames,
computeArch);
} }
free(options); #if 0
free(filenames); fprintf(stderr, "fileIR= %s\n", fileIR.c_str());
return status; fprintf(stderr, "filePTX= %s\n", filePTX.c_str());
fprintf(stderr, "arch= %s\n", _arch.c_str());
fprintf(stderr, "debug= %s\n", _debug ? "true" : "false");
fprintf(stderr, "verbose= %s\n", _verbose ? "true" : "false");
fprintf(stderr, "opt= %d\n", _opt);
fprintf(stderr, "ftz= %d\n", _ftz);
fprintf(stderr, "prec-sqrt= %d\n", _precSqrt);
fprintf(stderr, "prec-div= %d\n", _precDiv);
fprintf(stderr, "fma= %d\n", _fma);
fprintf(stderr, "use_fast_math= %s\n", _useFastMath ? "true" : "false");
#endif
int computeArch = 35;
assert(_arch == std::string("sm_35"));
if (_useFastMath)
{
_ftz = 1;
_precSqrt = _precDiv = 0;
}
std::vector<std::string> nvvmOptions;
nvvmOptions.push_back("-arch=compute_35");
nvvmOptions.push_back("-ftz=" + lValueToString(_ftz));
nvvmOptions.push_back("-prec-sqrt=" + lValueToString(_precSqrt));
nvvmOptions.push_back("-prec-div=" + lValueToString(_precDiv));
nvvmOptions.push_back("-fma=" + lValueToString(_fma));
if (_debug)
nvvmOptions.push_back("-g");
std::vector<std::string> nvvmFiles;
nvvmFiles.push_back(fileIR);
std::ofstream outputPTX(filePTX.c_str());
assert(outputPTX);
const int ret = generatePTX(nvvmOptions, nvvmFiles, outputPTX, computeArch);
return ret;
} }