diff --git a/examples/portable/aobench/Makefile_ptx b/examples/portable/aobench/Makefile_ptx index fb390eb1..6cc1a378 100644 --- a/examples/portable/aobench/Makefile_ptx +++ b/examples/portable/aobench/Makefile_ptx @@ -2,7 +2,15 @@ PROG=ao ISPC_SRC=ao.ispc CU_SRC=ao.cu CXX_SRC=ao.cpp + +#K80 +#PTXCC_REGMAX=128 +#NVARCH=sm_37 + +#K20/K40 PTXCC_REGMAX=64 +NVARCH=sm_35 + #ISPC_FLAGS= --opt=disable-uniform-control-flow #LLVM_GPU=1 diff --git a/examples/portable/common_ptx.mk b/examples/portable/common_ptx.mk index cfaa0b02..da693275 100644 --- a/examples/portable/common_ptx.mk +++ b/examples/portable/common_ptx.mk @@ -1,21 +1,22 @@ NVCC_SRC=../../util/nvcc_helpers.cu NVCC_OBJS=objs_ptx/nvcc_helpers_nvcc.o +NVARCH ?= sm_35 # CXX=g++ -ffast-math CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_ptx/ -D_CUDA_ -I../../util -I../../ # NVCC=nvcc -NVCC_FLAGS+=-O3 -arch=sm_35 -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_ptx/ +NVCC_FLAGS+=-O3 -arch=$(NVARCH) -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_ptx/ ifdef PTXCC_REGMAX NVCC_FLAGS += --maxrregcount=$(PTXCC_REGMAX) endif NVCC_FLAGS+=--use_fast_math # LD=nvcc -LDFLAGS=-lcudart -lcudadevrt -arch=sm_35 +LDFLAGS=-lcudart -lcudadevrt -arch=$(NVARCH) # -PTXCC=$(ISPC_HOME)/ptxtools/ptxcc -PTXCC_FLAGS+= -Xptxas=-v +PTXCC=$(ISPC_HOME)/ptxtools/ptxcc --arch=$(NVARCH) +PTXCC_FLAGS+= -Xptxas=-v ifdef PTXCC_REGMAX PTXCC_FLAGS += -maxrregcount=$(PTXCC_REGMAX) endif @@ -41,13 +42,13 @@ CXX_SRC+=ispc_malloc.cpp CXX_OBJS+=objs_ptx/ispc_malloc_gcc.o PTXGEN = $(ISPC_HOME)/ptxtools/ptxgen -PTXGEN += --use_fast_math +PTXGEN += --use_fast_math --arch=$(NVARCH) #LLVM32=$(HOME)/usr/local/llvm/bin-3.2 #LLVM32DIS=$(LLVM32)/bin/llvm-dis LLC=$(LLVM_ROOT)/bin/llc -LLC_FLAGS=-march=nvptx64 -mcpu=sm_35 +LLC_FLAGS=-march=nvptx64 -mcpu=$(NVARCH) # .SUFFIXES: .bc .o .cu .ll diff --git a/examples/portable/deferred/Makefile_ptx b/examples/portable/deferred/Makefile_ptx index 58385e59..9052db75 100644 --- a/examples/portable/deferred/Makefile_ptx +++ b/examples/portable/deferred/Makefile_ptx @@ -2,7 +2,14 @@ PROG=deferred_shading ISPC_SRC=kernels.ispc CU_SRC=kernels.cu CXX_SRC=common.cpp main.cpp + +#K80 +#PTXCC_REGMAX=128 +#NVARCH=sm_37 + +#K20/K40 PTXCC_REGMAX=64 +NVARCH=sm_35 NVVM_GPU=1 #LLVM_GPU=1 diff --git a/examples/portable/mergeSort/Makefile_ptx b/examples/portable/mergeSort/Makefile_ptx index f64581e4..1b2b4917 100644 --- a/examples/portable/mergeSort/Makefile_ptx +++ b/examples/portable/mergeSort/Makefile_ptx @@ -2,10 +2,19 @@ PROG=mergeSort ISPC_SRC=mergeSort.ispc CU_SRC=mergeSort.cu CXX_SRC=mergeSort.cpp mergeSort.cpp -PTXCC_REGMAX=64 #PTXCC_FLAGS= -Xptxas=-O3 #NVCC_FLAGS=-Xptxas=-O0 +#K80 +#PTXCC_REGMAX=128 +#NVARCH=sm_37 + +#K20/K40 +PTXCC_REGMAX=64 +NVARCH=sm_35 + + + LLVM_GPU=1 NVVM_GPU=1 diff --git a/examples/portable/nbody_hermite4/Makefile_ptx b/examples/portable/nbody_hermite4/Makefile_ptx index da8b268b..0c9300d6 100644 --- a/examples/portable/nbody_hermite4/Makefile_ptx +++ b/examples/portable/nbody_hermite4/Makefile_ptx @@ -2,10 +2,18 @@ PROG=hermite4 ISPC_SRC=hermite4.ispc #CU_SRC=hermite4.cu CXX_SRC=hermite4.cpp + +#K80 +#PTXCC_REGMAX=128 +#NVARCH=sm_37 + +#K20/K40 PTXCC_REGMAX=64 +NVARCH=sm_35 + #ISPC_FLAGS= --opt=disable-uniform-control-flow -#LLVM_GPU=1 +# LLVM_GPU=1 NVVM_GPU=1 include ../common_ptx.mk diff --git a/examples/portable/options/Makefile_ptx b/examples/portable/options/Makefile_ptx index 1065eb92..b9262d2a 100644 --- a/examples/portable/options/Makefile_ptx +++ b/examples/portable/options/Makefile_ptx @@ -2,7 +2,16 @@ PROG=options ISPC_SRC=options.ispc CU_SRC=options.cu CXX_SRC=options.cpp + +#K80 +#PTXCC_REGMAX=128 +#NVARCH=sm_37 + +#K20/K40 PTXCC_REGMAX=128 +NVARCH=sm_35 + + #LLVM_GPU=1 diff --git a/examples/portable/radixSort/Makefile_ptx b/examples/portable/radixSort/Makefile_ptx index da7494e4..899f5b3b 100644 --- a/examples/portable/radixSort/Makefile_ptx +++ b/examples/portable/radixSort/Makefile_ptx @@ -4,7 +4,16 @@ ISPC_SRC=radixSort.ispc CU_SRC=radixSort.cu # NVCC_FLAGS=-Xptxas=-O1 CXX_SRC=radixSort.cpp radixSort.cpp + +#K80 +#PTXCC_REGMAX=128 +#NVARCH=sm_37 + +#K20/K40 PTXCC_REGMAX=64 +NVARCH=sm_35 + + LLVM_GPU=1 NVVM_GPU=1 diff --git a/examples/portable/rt/Makefile_ptx b/examples/portable/rt/Makefile_ptx index 45eae8c6..8d524bb2 100644 --- a/examples/portable/rt/Makefile_ptx +++ b/examples/portable/rt/Makefile_ptx @@ -2,7 +2,16 @@ PROG=rt ISPC_SRC=rt.ispc CU_SRC=rt.cu CXX_SRC=rt.cpp + +#K20/K40 PTXCC_REGMAX=32 +NVARCH=sm_35 + +#K80 +#PTXCC_REGMAX=64 +#NVARCH=sm_37 + + #LLVM_GPU=1 NVVM_GPU=1 diff --git a/examples/portable/volume_rendering/Makefile_ptx b/examples/portable/volume_rendering/Makefile_ptx index 6aef695a..630a70d2 100644 --- a/examples/portable/volume_rendering/Makefile_ptx +++ b/examples/portable/volume_rendering/Makefile_ptx @@ -2,7 +2,16 @@ PROG=volume ISPC_SRC=volume.ispc CU_SRC=volume.cu CXX_SRC=volume.cpp + +#K80 +#PTXCC_REGMAX=128 +#NVARCH=sm_37 + +#K20/K40 PTXCC_REGMAX=64 +NVARCH=sm_35 + + #LLVM_GPU=1 NVVM_GPU=1 diff --git a/ptxtools/GPUTargets.h b/ptxtools/GPUTargets.h new file mode 100644 index 00000000..a15d4d09 --- /dev/null +++ b/ptxtools/GPUTargets.h @@ -0,0 +1,40 @@ +#pragma once + +// -*- mode: c++ -*- +/* + Copyright (c) 2015, Evghenii Gaburov + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +namespace GPUTargets +{ + static const auto computeMode = {"sm_35", "sm_37"}; +} diff --git a/ptxtools/Makefile b/ptxtools/Makefile index cc25d5e9..08bf72d1 100644 --- a/ptxtools/Makefile +++ b/ptxtools/Makefile @@ -33,7 +33,7 @@ all: ptxcc ptxgen CXX=clang++ -CXXFLAGS += -O3 --std=c++11 +CXXFLAGS += -O3 --std=c++11 -Wno-deprecated-register CXXFLAGS += -I/opt/local/include LD=clang++ @@ -77,5 +77,5 @@ ptxgen: ptxgen.cpp clean: /bin/rm -f ptxgen ptxcc $(OBJ) ptxgrammar.hh ptxgrammar.cc ptx.cc ptxgrammar.output -$(OBJ): ptxgrammar.cc ptx.cc PTXParser.h PTXLexer.h +$(OBJ): ptxgrammar.cc ptx.cc PTXParser.h PTXLexer.h GPUTargets.h diff --git a/ptxtools/ptxcc.cpp b/ptxtools/ptxcc.cpp index 474ab3ff..49385f69 100644 --- a/ptxtools/ptxcc.cpp +++ b/ptxtools/ptxcc.cpp @@ -1,6 +1,6 @@ // -*- mode: c++ -*- /* - Copyright (c) 2014, Evghenii Gaburov + Copyright (c) 2014-2015, Evghenii Gaburov All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ met: #include #include #include "PTXParser.h" +#include "GPUTargets.h" /* @@ -117,11 +118,15 @@ static std::vector lSplitString(const std::string &s, char delim) static void lUsage(const int ret) { fprintf(stdout, "\nusage: ptxcc [options] file.ptx \n"); - fprintf(stdout, " [--help]\t\t\t\t This help\n"); - fprintf(stdout, " [--verbose]\t\t\t\t Be verbose\n"); - fprintf(stdout, " [--arch={%s}]\t\t\t GPU target architecture\n", "sm_35"); - fprintf(stdout, " [-o ]\t\t\t\t Output file name\n"); - fprintf(stdout, " [-Xnvcc=]\t\t Arguments to pass through to \"nvcc\"\n"); + fprintf(stdout, " [--help]\t\t\t This help\n"); + fprintf(stdout, " [--verbose]\t\t\t Be verbose\n"); + fprintf(stdout, " [--arch=]\t\t\t GPU target architectures:\n"); + fprintf(stdout, " \t\t\t\t "); + for (const auto& mode : GPUTargets::computeMode) + fprintf(stdout, "%s ", mode); + fprintf(stdout, "\n"); + fprintf(stdout, " [-o ]\t\t\t Output file name\n"); + fprintf(stdout, " [-Xnvcc=]\t Arguments to pass through to \"nvcc\"\n"); fprintf(stdout, " \n"); exit(ret); } @@ -132,7 +137,7 @@ int main(int _argc, char * _argv[]) char *argv[128]; lGetAllArgs(_argc, _argv, argc, argv); - std::string arch="sm_35"; + std::string arch = *GPUTargets::computeMode.begin(); std::string filePTX; std::string fileOBJ; std::string extString = ".ptx"; @@ -195,7 +200,11 @@ int main(int _argc, char * _argv[]) for (int i= 0; i < (int)nvccArgumentList.size(); i++) fprintf(stderr, " arg= %d : %s \n", i, nvccArgumentList[i].c_str()); #endif - assert(arch == std::string("sm_35")); + if (std::find(GPUTargets::computeMode.begin(), GPUTargets::computeMode.end(), arch) == GPUTargets::computeMode.end()) + { + fprintf(stderr, "ptxcc fatal : --arch=%s is not supported; use option --help for more information\n", arch.c_str()); + exit(1); + } if (filePTX.empty()) { fprintf(stderr, "ptxcc fatal : No input file specified; use option --help for more information\n"); diff --git a/ptxtools/ptxgen.cpp b/ptxtools/ptxgen.cpp index 7d764d08..23659ea6 100644 --- a/ptxtools/ptxgen.cpp +++ b/ptxtools/ptxgen.cpp @@ -1,6 +1,6 @@ // -*- mode: c++ -*- /* - Copyright (c) 2014, Evghenii Gaburov + Copyright (c) 2014-2015, Evghenii Gaburov All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,6 +43,8 @@ met: #include #include #include +#include +#include "GPUTargets.h" #include #include @@ -51,9 +53,7 @@ met: template static std::string lValueToString(const T& value) { - std::ostringstream oss; - oss << value; - return oss.str(); + return std::to_string(value); } struct Exception : public std::exception @@ -76,7 +76,7 @@ struct NVVMProg nvvmProgram get() const {return prog; } }; -static std::string getLibDeviceName(const int computeArch) +static std::string getLibDeviceName(int computeArch) { const char *env = getenv("LIBNVVM_HOME"); #ifdef LIBNVVM_HOME @@ -97,6 +97,7 @@ static std::string getLibDeviceName(const int computeArch) /* Use libdevice for compute_20, if the target is not compute_20, compute_30, * or compute_35. */ + if (computeArch == 37) computeArch = 35; const std::string libdevice = std::string("/libdevice/libdevice.compute_") + lValueToString(computeArch)+ "." + @@ -219,7 +220,11 @@ static void lUsage(const int ret) fprintf(stdout, "\nusage: ptxgen [options] file.[ll,bc] \n"); fprintf(stdout, " [--help]\t\t This help\n"); fprintf(stdout, " [--verbose]\t\t Be verbose\n"); - fprintf(stdout, " [--arch={%s}]\t GPU target architecture\n", "sm_35"); + fprintf(stdout, " [--arch=]\t\t GPU target architectures:\n"); + fprintf(stdout, " \t\t\t "); + for (const auto& mode : GPUTargets::computeMode) + fprintf(stdout, "%s ", mode); + fprintf(stdout, "\n"); fprintf(stdout, " [-o ]\t\t Output file name\n"); fprintf(stdout, " [-g]\t\t Enable generation of debuggin information \n"); fprintf(stdout, " [--opt=]\t\t Optimization parameters \n"); @@ -252,7 +257,7 @@ int main(int argc, char *argv[]) bool _useFastMath = false; bool _debug = false; bool _verbose = false; - std::string _arch = "sm_35"; + std::string _arch = *GPUTargets::computeMode.begin(); std::string fileIR, filePTX; for (int i = 1; i < argc; ++i) @@ -333,8 +338,11 @@ int main(int argc, char *argv[]) fprintf(stderr, "use_fast_math= %s\n", _useFastMath ? "true" : "false"); #endif - int computeArch = 35; - assert(_arch == std::string("sm_35")); + if (std::find(GPUTargets::computeMode.begin(), GPUTargets::computeMode.end(), _arch) == GPUTargets::computeMode.end()) + { + fprintf(stderr, "ptxcc fatal : --arch=%s is not supported; use option --help for more information\n", _arch.c_str()); + exit(1); + } if (_useFastMath) { @@ -342,8 +350,14 @@ int main(int argc, char *argv[]) _precSqrt = _precDiv = 0; } + /* replace "sm" with "compute" */ + assert(_arch[0] == 's' && _arch[1] == 'm' && _arch[2] == '_'); + const std::string _mode = std::string("compute_") + &_arch[3]; + const int computeArch = atoi(&_arch[3]); + + std::vector nvvmOptions; - nvvmOptions.push_back("-arch=compute_35"); + nvvmOptions.push_back("-arch=" + _mode); nvvmOptions.push_back("-ftz=" + lValueToString(_ftz)); nvvmOptions.push_back("-prec-sqrt=" + lValueToString(_precSqrt)); nvvmOptions.push_back("-prec-div=" + lValueToString(_precDiv)); diff --git a/ptxtools/ptxgrammar.yy b/ptxtools/ptxgrammar.yy index 13120739..61db3fca 100644 --- a/ptxtools/ptxgrammar.yy +++ b/ptxtools/ptxgrammar.yy @@ -1,5 +1,5 @@ /* - Copyright (c) 2014, Evghenii Gaburov + Copyright (c) 2014-2015, Evghenii Gaburov All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,6 +43,8 @@ met: #include #include #include + #include + #include "GPUTargets.h" #define YYERROR_VERBOSE 1 @@ -116,7 +118,13 @@ header: version: TOKEN_VERSION TOKEN_FLOAT { assert($2 >= 3.0); } ;//std::cerr << "Reading PTX version " << $2 << std::endl; }; target: - TOKEN_TARGET TOKEN_STRING { assert(std::string($2) == std::string("sm_35")); } //std::cerr << "Target " << $2 << std::endl; }; + TOKEN_TARGET TOKEN_STRING { + if (std::find(GPUTargets::computeMode.begin(), GPUTargets::computeMode.end(), std::string($2)) == GPUTargets::computeMode.end()) + { + fprintf(stderr, "ptxcc fatal : Found wrong Target=\"%s\" in ptx file\n", $2); + exit(-1); + } + } address_size: TOKEN_ADDRESS_SIZE TOKEN_INT { assert($2 == 64); } //std::cerr << "Address_Size " << $2 << std::endl; };