From c54a91eab3ed10c3d4c0cbe0ad5bff0b1beea61f Mon Sep 17 00:00:00 2001 From: Evghenii Date: Thu, 20 Feb 2014 10:50:17 +0100 Subject: [PATCH] added ptxgen --- examples_ptx/ptxtools/ptxcc.cpp | 14 +- examples_ptx/ptxtools/ptxgen.cpp | 404 ++++++++++++++++++++----------- 2 files changed, 263 insertions(+), 155 deletions(-) diff --git a/examples_ptx/ptxtools/ptxcc.cpp b/examples_ptx/ptxtools/ptxcc.cpp index e6a6942c..f81b2bcf 100644 --- a/examples_ptx/ptxtools/ptxcc.cpp +++ b/examples_ptx/ptxtools/ptxcc.cpp @@ -79,13 +79,13 @@ static std::vector lSplitString(const std::string &s, char delim) static void lUsage(const int ret) { - fprintf(stderr, "\nusage: ptxcc\n"); - fprintf(stderr, " [--help]\t\t\t\t This help\n"); - fprintf(stderr, " [--verbose]\t\t\t\t Be verbose\n"); - fprintf(stderr, " [--arch={%s}]\t\t\t GPU target architecture\n", "sm_35"); - fprintf(stderr, " [-o ]\t\t\t\t Output file name\n"); - fprintf(stderr, " [-Xnvcc=]\t\t Arguments to pass through to \"nvcc\"\n"); - fprintf(stderr, " \n"); + fprintf(stdout, "\nusage: ptxcc\n"); + fprintf(stdout, " [--help]\t\t\t\t This help\n"); + fprintf(stdout, " [--verbose]\t\t\t\t Be verbose\n"); + fprintf(stdout, " [--arch={%s}]\t\t\t GPU target architecture\n", "sm_35"); + fprintf(stdout, " [-o ]\t\t\t\t Output file name\n"); + fprintf(stdout, " [-Xnvcc=]\t\t Arguments to pass through to \"nvcc\"\n"); + fprintf(stdout, " \n"); exit(ret); } diff --git a/examples_ptx/ptxtools/ptxgen.cpp b/examples_ptx/ptxtools/ptxgen.cpp index 09f43b8d..24fbbe5c 100644 --- a/examples_ptx/ptxtools/ptxgen.cpp +++ b/examples_ptx/ptxtools/ptxgen.cpp @@ -1,37 +1,29 @@ -/* - * Copyright 1993-2012 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ +#include +#include +#include +#include +#include +#include +#include +#include +#include #include - -#include -#include #include -/* Two levels of indirection to stringify LIBDEVICE_MAJOR_VERSION and - * LIBDEVICE_MINOR_VERSION correctly. */ -#define getLibDeviceNameForArch(ARCH) \ - _getLibDeviceNameForArch(ARCH, \ - LIBDEVICE_MAJOR_VERSION, \ - LIBDEVICE_MINOR_VERSION) -#define _getLibDeviceNameForArch(ARCH, MAJOR, MINOR) \ - __getLibDeviceNameForArch(ARCH, MAJOR, MINOR) -#define __getLibDeviceNameForArch(ARCH, MAJOR, MINOR) \ - ("/libdevice/libdevice.compute_" #ARCH "." #MAJOR #MINOR ".bc") -#define getLibnvvmHome _getLibnvvmHome(LIBNVVM_HOME) -#define _getLibnvvmHome(NVVM_HOME) __getLibnvvmHome(NVVM_HOME) -#define __getLibnvvmHome(NVVM_HOME) (#NVVM_HOME) +template +static std::string lValueToString(const T& value) +{ + std::ostringstream oss; + oss << value; + return oss.str(); +} typedef struct stat Stat; + +#define PTXGENStatus int enum { PTXGEN_SUCCESS = 0x0000, PTXGEN_FILE_IO_ERROR = 0x0001, @@ -42,54 +34,50 @@ enum { PTXGEN_LIBNVVM_HOME_UNDEFINED = 0x0020, PTXGEN_LIBNVVM_VERIFICATION_ERROR = 0x0040 }; -#define PTXGENStatus int -static PTXGENStatus getLibDeviceName(int computeArch, char **buffer) +static PTXGENStatus getLibDeviceName(const int computeArch, std::string &libDeviceName) { - const char *libnvvmPath = getLibnvvmHome; - const char *libdevice = NULL; + const char *env = getenv("LIBNVVM_HOME"); +#ifdef LIBNVVM_HOME +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + const std::string libnvvmPath(env ? env : TOSTRING(LIBNVVM_HOME)); +#undef TOSTRING +#undef STRINGIFY +#else + const std::string libnvvmPath(env); +#endif - if (libnvvmPath == NULL) { - fprintf(stderr, "The environment variable LIBNVVM_HOME undefined\n"); + if (libnvvmPath.empty()) + { + fprintf(stderr, "The environment variable LIBNVVM_HOME is undefined\n"); return PTXGEN_LIBNVVM_HOME_UNDEFINED; } /* Use libdevice for compute_20, if the target is not compute_20, compute_30, * or compute_35. */ - switch (computeArch) { - default: - libdevice = getLibDeviceNameForArch(20); - break; - case 30: - libdevice = getLibDeviceNameForArch(30); - break; - case 35: - libdevice = getLibDeviceNameForArch(35); - break; - } + const std::string libdevice = + std::string("/libdevice/libdevice.compute_") + + lValueToString(computeArch)+ "." + + lValueToString(LIBDEVICE_MAJOR_VERSION) + + lValueToString(LIBDEVICE_MINOR_VERSION) + + ".bc"; - *buffer = (char *) malloc(strlen(libnvvmPath) + strlen(libdevice) + 1); - if (*buffer == NULL) { - fprintf(stderr, "Failed to allocate memory\n"); - return PTXGEN_BAD_ALLOC_ERROR; - } - - /* Concatenate libnvvmPath and name. */ - *buffer = strcat(strcpy(*buffer, libnvvmPath), libdevice); + libDeviceName = libnvvmPath + libdevice; return PTXGEN_SUCCESS; } -static PTXGENStatus addFileToProgram(const char *filename, nvvmProgram prog) +static PTXGENStatus addFileToProgram(const std::string &filename, nvvmProgram prog) { char *buffer; size_t size; Stat fileStat; /* Open the input file. */ - FILE *f = fopen(filename, "rb"); + FILE *f = fopen(filename.c_str(), "rb"); if (f == NULL) { - fprintf(stderr, "Failed to open %s\n", filename); + fprintf(stderr, "Failed to open %s\n", filename.c_str()); return PTXGEN_FILE_IO_ERROR; } @@ -102,17 +90,17 @@ static PTXGENStatus addFileToProgram(const char *filename, nvvmProgram prog) } size = fread(buffer, 1, fileStat.st_size, f); if (ferror(f)) { - fprintf(stderr, "Failed to read %s\n", filename); + fprintf(stderr, "Failed to read %s\n", filename.c_str()); fclose(f); free(buffer); return PTXGEN_FILE_IO_ERROR; } fclose(f); - if (nvvmAddModuleToProgram(prog, buffer, size, filename) != NVVM_SUCCESS) { + if (nvvmAddModuleToProgram(prog, buffer, size, filename.c_str()) != NVVM_SUCCESS) { fprintf(stderr, "Failed to add the module %s to the compilation unit\n", - filename); + filename.c_str()); free(buffer); return PTXGEN_LIBNVVM_ERROR; } @@ -121,45 +109,58 @@ static PTXGENStatus addFileToProgram(const char *filename, nvvmProgram prog) return PTXGEN_SUCCESS; } -static PTXGENStatus generatePTX(int numOptions, const char **options, - int numFilenames, const char **filenames, - int computeArch) +static PTXGENStatus generatePTX( + std::vector nvvmOptions, + std::vector nvvmFiles, + std::ostream &out, + const int computeArch) { + nvvmProgram prog; PTXGENStatus status; - nvvmProgram prog; - char *libDeviceName; - int i; /* Create the compiliation unit. */ - if (nvvmCreateProgram(&prog) != NVVM_SUCCESS) { + if (nvvmCreateProgram(&prog) != NVVM_SUCCESS) + { fprintf(stderr, "Failed to create the compilation unit\n"); return PTXGEN_LIBNVVM_ERROR; } + /* Add libdevice. */ - status = getLibDeviceName(computeArch, &libDeviceName); - if (status != PTXGEN_SUCCESS) { + std::string libDeviceName; + status = getLibDeviceName(computeArch, libDeviceName); + if (status != PTXGEN_SUCCESS) + { nvvmDestroyProgram(&prog); return status; } status = addFileToProgram(libDeviceName, prog); - free(libDeviceName); - if (status != PTXGEN_SUCCESS) { + if (status != PTXGEN_SUCCESS) + { + fprintf(stderr, "Please double-check LIBNVVM_HOME environmental variable.\n"); nvvmDestroyProgram(&prog); return status; } /* Add the module to the compilation unit. */ - for (i = 0; i < numFilenames; ++i) { - status = addFileToProgram(filenames[i], prog); - if (status != PTXGEN_SUCCESS) { + for (int i = 0; i < (int)nvvmFiles.size(); ++i) + { + status = addFileToProgram(nvvmFiles[i], prog); + if (status != PTXGEN_SUCCESS) + { nvvmDestroyProgram(&prog); return status; } } + const int numOptions = nvvmOptions.size(); + std::vector options(numOptions); + for (int i = 0; i < numOptions; i++) + options[i] = nvvmOptions[i].c_str(); + /* Verify the compilation unit. */ - if (nvvmVerifyProgram(prog, numOptions, options) != NVVM_SUCCESS) { + if (nvvmVerifyProgram(prog, numOptions, &options[0]) != NVVM_SUCCESS) + { fprintf(stderr, "Failed to verify the compilation unit\n"); status |= PTXGEN_LIBNVVM_VERIFICATION_ERROR; } @@ -167,74 +168,81 @@ static PTXGENStatus generatePTX(int numOptions, const char **options, /* Print warnings and errors. */ { size_t logSize; - char *log; - if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS) { + if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS) + { fprintf(stderr, "Failed to get the compilation log size\n"); status |= PTXGEN_LIBNVVM_ERROR; - } else { - log = (char *) malloc(logSize); - if (log == NULL) { - fprintf(stderr, "Failed to allocate memory\n"); - status |= PTXGEN_BAD_ALLOC_ERROR; - } else if (nvvmGetProgramLog(prog, log) != NVVM_SUCCESS) { + } + else + { + std::string log(logSize,0); + if (nvvmGetProgramLog(prog, &log[0]) != NVVM_SUCCESS) + { fprintf(stderr, "Failed to get the compilation log\n"); status |= PTXGEN_LIBNVVM_ERROR; - } else { - fprintf(stderr, "%s\n", log); + } + else + { + fprintf(stderr, "%s\n", log.c_str()); } - free(log); } } - if (status & PTXGEN_LIBNVVM_VERIFICATION_ERROR) { + if (status & PTXGEN_LIBNVVM_VERIFICATION_ERROR) + { nvvmDestroyProgram(&prog); return status; } /* Compile the compilation unit. */ - if (nvvmCompileProgram(prog, numOptions, options) != NVVM_SUCCESS) { + if (nvvmCompileProgram(prog, numOptions, &options[0]) != NVVM_SUCCESS) + { fprintf(stderr, "Failed to generate PTX from the compilation unit\n"); status |= PTXGEN_LIBNVVM_COMPILATION_ERROR; - } else { + } + else + { size_t ptxSize; - char *ptx; - if (nvvmGetCompiledResultSize(prog, &ptxSize) != NVVM_SUCCESS) { + if (nvvmGetCompiledResultSize(prog, &ptxSize) != NVVM_SUCCESS) + { fprintf(stderr, "Failed to get the PTX output size\n"); status |= PTXGEN_LIBNVVM_ERROR; - } else { - ptx = (char *) malloc(ptxSize); - if (ptx == NULL) { - fprintf(stderr, "Failed to allocate memory\n"); - status |= PTXGEN_BAD_ALLOC_ERROR; - } else if (nvvmGetCompiledResult(prog, ptx) != NVVM_SUCCESS) { + } + else + { + std::string ptx(ptxSize,0); + if (nvvmGetCompiledResult(prog, &ptx[0]) != NVVM_SUCCESS) + { fprintf(stderr, "Failed to get the PTX output\n"); status |= PTXGEN_LIBNVVM_ERROR; - } else { - fprintf(stdout, "%s\n", ptx); + } + else + { + out << ptx; } - free(ptx); } } /* Print warnings and errors. */ { size_t logSize; - char *log; - if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS) { + if (nvvmGetProgramLogSize(prog, &logSize) != NVVM_SUCCESS) + { fprintf(stderr, "Failed to get the compilation log size\n"); status |= PTXGEN_LIBNVVM_ERROR; - } else { - log = (char *) malloc(logSize); - if (log == NULL) { - fprintf(stderr, "Failed to allocate memory\n"); - status |= PTXGEN_BAD_ALLOC_ERROR; - } else if (nvvmGetProgramLog(prog, log) != NVVM_SUCCESS) { + } + else + { + std::string log(logSize,0); + if (nvvmGetProgramLog(prog, &log[0]) != NVVM_SUCCESS) + { fprintf(stderr, "Failed to get the compilation log\n"); status |= PTXGEN_LIBNVVM_ERROR; - } else { - fprintf(stderr, "%s\n", log); + } + else + { + fprintf(stderr, "%s\n", log.c_str()); } - free(log); } } @@ -250,51 +258,151 @@ static void showUsage() " [FILE] could be a .bc file or a .ll file\n"); } +static void lUsage(const int ret) +{ + fprintf(stdout, "\nusage: ptxgen\n"); + fprintf(stdout, " [--help]\t\t This help\n"); + fprintf(stdout, " [--verbose]\t\t Be verbose\n"); + fprintf(stdout, " [--arch={%s}]\t GPU target architecture\n", "sm_35"); + fprintf(stdout, " [-o ]\t\t Output file name\n"); + fprintf(stdout, " [-g]\t\t Enable generation of debuggin information \n"); + fprintf(stdout, " [--opt=]\t\t Optimization parameters \n"); + fprintf(stdout, " \t\t\t 0 - disable optimizations \n"); + fprintf(stdout, " \t\t\t 3 - defalt, enable optimizations \n"); + fprintf(stdout, " [--ftz=]\t\t Flush-to-zero mode when performsing single-precision floating-point operations\n"); + fprintf(stdout, " \t\t\t 0 - default, preserve denormal values\n"); + fprintf(stdout, " \t\t\t 1 - flush denormal values to zero\n"); + fprintf(stdout, " [--prec-sqrt=]\t Precision mode for single-precision floating-point square root\n"); + fprintf(stdout, " \t\t\t 0 - use a faster approximation\n"); + fprintf(stdout, " \t\t\t 1 - default, use IEEE round-to-nearest mode\n"); + fprintf(stdout, " [--prec-div=]\t Precision mode for single-precision floating-point division and reciprocals\n"); + fprintf(stdout, " \t\t\t 0 - use a faster approximation\n"); + fprintf(stdout, " \t\t\t 1 - default, use IEEE round-to-nearest mode\n"); + fprintf(stdout, " [--fma=]\t\t FMA contraction mode \n"); + fprintf(stdout, " \t\t\t 0 - disable\n"); + fprintf(stdout, " \t\t\t 1 - default, enable\n"); + fprintf(stdout, " [--use_fast_math]\t Make use of fast maih. Implies --ftz=1 --prec-div=0 --prec-sqrt=0\n"); + fprintf(stdout, " \n"); + exit(ret); +} + int main(int argc, char *argv[]) { - PTXGENStatus status = PTXGEN_SUCCESS; - int numOptions = 0; - char **options = NULL; - int numFilenames = 0; - char **filenames = NULL; - int computeArch = 35; - int i; + int _opt = 3; + int _ftz = 0; + int _precSqrt = 1; + int _precDiv = 1; + int _fma = 1; + bool _useFastMath = false; + bool _debug = false; + bool _verbose = false; + std::string _arch = "sm_35"; + std::string fileIR, filePTX; - /* Process the command-line arguments to extract the libnvvm options and the - * input file names. */ - if (argc == 1) { - showUsage(); - return PTXGEN_INVALID_USAGE; - } - - options = (char **) malloc((argc ) * sizeof (char *)); - filenames = (char **) malloc((argc - 1) * sizeof (char *)); - - for (i = 1; i < argc; ++i) { - if (argv[i][0] == '-') { - options[numOptions] = argv[i]; - ++numOptions; - } else { - filenames[numFilenames] = argv[i]; - ++numFilenames; + for (int i = 1; i < argc; ++i) + { + if (!strcmp(argv[i], "--help")) + lUsage(0); + else if (!strncmp(argv[i], "--arch=", 7)) + _arch = std::string(argv[i]+7); + else if (!strncmp(argv[i], "-g", 2)) + _debug = true; + else if (!strncmp(argv[i], "--verbose", 9)) + _verbose = true; + else if (!strncmp(argv[i], "--opt=", 6)) + _opt = atoi(argv[i]+6); + else if (!strncmp(argv[i], "--ftz=", 6)) + _ftz = atoi(argv[i]+6); + else if (!strncmp(argv[i], "--prec-sqrt=", 12)) + _precSqrt = atoi(argv[i]+12); + else if (!strncmp(argv[i], "--prec-div=", 11)) + _precDiv = atoi(argv[i]+11); + else if (!strncmp(argv[i], "--fma=", 6)) + _fma = atoi(argv[i]+6); + else if (!strncmp(argv[i], "--use_fast_math", 15)) + _useFastMath = true; + else if (!strcmp(argv[i], "-o")) + { + if (++i == argc) + { + fprintf(stderr, "No output file specified after -o option.\n"); + lUsage(1); + } + filePTX = std::string(argv[i]); + } + else + { + const char * ext = strrchr(argv[i], '.'); + if (ext == NULL) + { + fprintf(stderr, " Unknown argument: %s \n", argv[i]); + lUsage(1); + } + else if (strncmp(ext, ".ll", 3) && strncmp(ext, ".bc", 3)) + { + fprintf(stderr, " Unkown extension of the input file: %s \n", ext); + lUsage(1); + } + else if (filePTX.empty()) + { + fileIR = std::string(argv[i]); + if (filePTX.empty()) + { + char * baseName = argv[i]; + while (baseName != ext) + filePTX += std::string(baseName++,1); + } + filePTX += ".ptx"; + } } } - static char sm_35opt[] = "-arch=compute_35"; - options[numOptions] = sm_35opt; - numOptions++; - - if (numFilenames == 0) { - /* If no input filename is found, then show the usage. */ - showUsage(); - status = PTXGEN_INVALID_USAGE; - } else { - /* Run libnvvm to generate PTX. */ - status = generatePTX(numOptions, (const char **) options, - numFilenames, (const char **) filenames, - computeArch); + + if (fileIR.empty()) + { + fprintf(stderr, "ptxgen fatal : No input file specified; use option --help for more information\n"); + exit(1); } - free(options); - free(filenames); - return status; +#if 0 + fprintf(stderr, "fileIR= %s\n", fileIR.c_str()); + fprintf(stderr, "filePTX= %s\n", filePTX.c_str()); + fprintf(stderr, "arch= %s\n", _arch.c_str()); + fprintf(stderr, "debug= %s\n", _debug ? "true" : "false"); + fprintf(stderr, "verbose= %s\n", _verbose ? "true" : "false"); + fprintf(stderr, "opt= %d\n", _opt); + fprintf(stderr, "ftz= %d\n", _ftz); + fprintf(stderr, "prec-sqrt= %d\n", _precSqrt); + fprintf(stderr, "prec-div= %d\n", _precDiv); + fprintf(stderr, "fma= %d\n", _fma); + fprintf(stderr, "use_fast_math= %s\n", _useFastMath ? "true" : "false"); +#endif + + int computeArch = 35; + assert(_arch == std::string("sm_35")); + + if (_useFastMath) + { + _ftz = 1; + _precSqrt = _precDiv = 0; + } + + std::vector nvvmOptions; + nvvmOptions.push_back("-arch=compute_35"); + nvvmOptions.push_back("-ftz=" + lValueToString(_ftz)); + nvvmOptions.push_back("-prec-sqrt=" + lValueToString(_precSqrt)); + nvvmOptions.push_back("-prec-div=" + lValueToString(_precDiv)); + nvvmOptions.push_back("-fma=" + lValueToString(_fma)); + if (_debug) + nvvmOptions.push_back("-g"); + + std::vector nvvmFiles; + nvvmFiles.push_back(fileIR); + + std::ofstream outputPTX(filePTX.c_str()); + assert(outputPTX); + + const int ret = generatePTX(nvvmOptions, nvvmFiles, outputPTX, computeArch); + + return ret; } +