diff --git a/examples/portable/aobench/Makefile_ptx b/examples/portable/aobench/Makefile_ptx index fb390eb1..ffc5e088 100644 --- a/examples/portable/aobench/Makefile_ptx +++ b/examples/portable/aobench/Makefile_ptx @@ -3,6 +3,9 @@ ISPC_SRC=ao.ispc CU_SRC=ao.cu CXX_SRC=ao.cpp PTXCC_REGMAX=64 +NVARCH=sm_35 +#PTXCC_REGMAX=128 +#NVARCH=sm_37 #ISPC_FLAGS= --opt=disable-uniform-control-flow #LLVM_GPU=1 diff --git a/examples/portable/common_ptx.mk b/examples/portable/common_ptx.mk index cfaa0b02..da693275 100644 --- a/examples/portable/common_ptx.mk +++ b/examples/portable/common_ptx.mk @@ -1,21 +1,22 @@ NVCC_SRC=../../util/nvcc_helpers.cu NVCC_OBJS=objs_ptx/nvcc_helpers_nvcc.o +NVARCH ?= sm_35 # CXX=g++ -ffast-math CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_ptx/ -D_CUDA_ -I../../util -I../../ # NVCC=nvcc -NVCC_FLAGS+=-O3 -arch=sm_35 -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_ptx/ +NVCC_FLAGS+=-O3 -arch=$(NVARCH) -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_ptx/ ifdef PTXCC_REGMAX NVCC_FLAGS += --maxrregcount=$(PTXCC_REGMAX) endif NVCC_FLAGS+=--use_fast_math # LD=nvcc -LDFLAGS=-lcudart -lcudadevrt -arch=sm_35 +LDFLAGS=-lcudart -lcudadevrt -arch=$(NVARCH) # -PTXCC=$(ISPC_HOME)/ptxtools/ptxcc -PTXCC_FLAGS+= -Xptxas=-v +PTXCC=$(ISPC_HOME)/ptxtools/ptxcc --arch=$(NVARCH) +PTXCC_FLAGS+= -Xptxas=-v ifdef PTXCC_REGMAX PTXCC_FLAGS += -maxrregcount=$(PTXCC_REGMAX) endif @@ -41,13 +42,13 @@ CXX_SRC+=ispc_malloc.cpp CXX_OBJS+=objs_ptx/ispc_malloc_gcc.o PTXGEN = $(ISPC_HOME)/ptxtools/ptxgen -PTXGEN += --use_fast_math +PTXGEN += --use_fast_math --arch=$(NVARCH) #LLVM32=$(HOME)/usr/local/llvm/bin-3.2 #LLVM32DIS=$(LLVM32)/bin/llvm-dis LLC=$(LLVM_ROOT)/bin/llc -LLC_FLAGS=-march=nvptx64 -mcpu=sm_35 +LLC_FLAGS=-march=nvptx64 -mcpu=$(NVARCH) # .SUFFIXES: .bc .o .cu .ll diff --git a/examples/portable/deferred/Makefile_ptx b/examples/portable/deferred/Makefile_ptx index 58385e59..99f5bc0b 100644 --- a/examples/portable/deferred/Makefile_ptx +++ b/examples/portable/deferred/Makefile_ptx @@ -3,6 +3,7 @@ ISPC_SRC=kernels.ispc CU_SRC=kernels.cu CXX_SRC=common.cpp main.cpp PTXCC_REGMAX=64 +NVARCH=sm_35 NVVM_GPU=1 #LLVM_GPU=1 diff --git a/examples/portable/nbody_hermite4/Makefile_ptx b/examples/portable/nbody_hermite4/Makefile_ptx index da8b268b..9a68c496 100644 --- a/examples/portable/nbody_hermite4/Makefile_ptx +++ b/examples/portable/nbody_hermite4/Makefile_ptx @@ -2,10 +2,11 @@ PROG=hermite4 ISPC_SRC=hermite4.ispc #CU_SRC=hermite4.cu CXX_SRC=hermite4.cpp -PTXCC_REGMAX=64 +PTXCC_REGMAX=128 #ISPC_FLAGS= --opt=disable-uniform-control-flow +NVARCH=sm_37 -#LLVM_GPU=1 +# LLVM_GPU=1 NVVM_GPU=1 include ../common_ptx.mk diff --git a/ptxtools/Makefile b/ptxtools/Makefile index cc25d5e9..46ba73cf 100644 --- a/ptxtools/Makefile +++ b/ptxtools/Makefile @@ -33,7 +33,7 @@ all: ptxcc ptxgen CXX=clang++ -CXXFLAGS += -O3 --std=c++11 +CXXFLAGS += -O3 --std=c++11 -Wno-deprecated-register CXXFLAGS += -I/opt/local/include LD=clang++ diff --git a/ptxtools/ptxcc.cpp b/ptxtools/ptxcc.cpp index 474ab3ff..0aa5d344 100644 --- a/ptxtools/ptxcc.cpp +++ b/ptxtools/ptxcc.cpp @@ -117,11 +117,13 @@ static std::vector lSplitString(const std::string &s, char delim) static void lUsage(const int ret) { fprintf(stdout, "\nusage: ptxcc [options] file.ptx \n"); - fprintf(stdout, " [--help]\t\t\t\t This help\n"); - fprintf(stdout, " [--verbose]\t\t\t\t Be verbose\n"); - fprintf(stdout, " [--arch={%s}]\t\t\t GPU target architecture\n", "sm_35"); - fprintf(stdout, " [-o ]\t\t\t\t Output file name\n"); - fprintf(stdout, " [-Xnvcc=]\t\t Arguments to pass through to \"nvcc\"\n"); + fprintf(stdout, " [--help]\t\t\t This help\n"); + fprintf(stdout, " [--verbose]\t\t\t Be verbose\n"); + fprintf(stdout, " [--arch=]\t\t\t GPU target architecture\n"); + fprintf(stdout, " \t\t\t\t sm_35 - K20, K40, GK110 chip \n"); + fprintf(stdout, " \t\t\t\t sm_37 - K80, GK210 chip \n"); + fprintf(stdout, " [-o ]\t\t\t Output file name\n"); + fprintf(stdout, " [-Xnvcc=]\t Arguments to pass through to \"nvcc\"\n"); fprintf(stdout, " \n"); exit(ret); } @@ -195,7 +197,7 @@ int main(int _argc, char * _argv[]) for (int i= 0; i < (int)nvccArgumentList.size(); i++) fprintf(stderr, " arg= %d : %s \n", i, nvccArgumentList[i].c_str()); #endif - assert(arch == std::string("sm_35")); + assert(arch == std::string("sm_35") || arch == std::string("sm_37")); if (filePTX.empty()) { fprintf(stderr, "ptxcc fatal : No input file specified; use option --help for more information\n"); diff --git a/ptxtools/ptxgen.cpp b/ptxtools/ptxgen.cpp index 7d764d08..c3431c8f 100644 --- a/ptxtools/ptxgen.cpp +++ b/ptxtools/ptxgen.cpp @@ -76,7 +76,7 @@ struct NVVMProg nvvmProgram get() const {return prog; } }; -static std::string getLibDeviceName(const int computeArch) +static std::string getLibDeviceName(int computeArch) { const char *env = getenv("LIBNVVM_HOME"); #ifdef LIBNVVM_HOME @@ -97,6 +97,7 @@ static std::string getLibDeviceName(const int computeArch) /* Use libdevice for compute_20, if the target is not compute_20, compute_30, * or compute_35. */ + if (computeArch == 37) computeArch = 35; const std::string libdevice = std::string("/libdevice/libdevice.compute_") + lValueToString(computeArch)+ "." + @@ -219,7 +220,9 @@ static void lUsage(const int ret) fprintf(stdout, "\nusage: ptxgen [options] file.[ll,bc] \n"); fprintf(stdout, " [--help]\t\t This help\n"); fprintf(stdout, " [--verbose]\t\t Be verbose\n"); - fprintf(stdout, " [--arch={%s}]\t GPU target architecture\n", "sm_35"); + fprintf(stdout, " [--arch=]\t\t GPU target architecture\n"); + fprintf(stdout, " \t\t\t sm_35 - K20, K40, GK110 chip \n"); + fprintf(stdout, " \t\t\t sm_37 - K80, GK210 chip \n"); fprintf(stdout, " [-o ]\t\t Output file name\n"); fprintf(stdout, " [-g]\t\t Enable generation of debuggin information \n"); fprintf(stdout, " [--opt=]\t\t Optimization parameters \n"); @@ -334,7 +337,10 @@ int main(int argc, char *argv[]) #endif int computeArch = 35; - assert(_arch == std::string("sm_35")); + assert(_arch == std::string("sm_35") || _arch == std::string("sm_37")); + + if (_arch == std::string("sm_37")) + computeArch = 37; if (_useFastMath) { @@ -343,7 +349,7 @@ int main(int argc, char *argv[]) } std::vector nvvmOptions; - nvvmOptions.push_back("-arch=compute_35"); + nvvmOptions.push_back("-arch=compute_"+std::to_string(computeArch)); nvvmOptions.push_back("-ftz=" + lValueToString(_ftz)); nvvmOptions.push_back("-prec-sqrt=" + lValueToString(_precSqrt)); nvvmOptions.push_back("-prec-div=" + lValueToString(_precDiv)); diff --git a/ptxtools/ptxgrammar.yy b/ptxtools/ptxgrammar.yy index 13120739..3cdc3f36 100644 --- a/ptxtools/ptxgrammar.yy +++ b/ptxtools/ptxgrammar.yy @@ -116,7 +116,11 @@ header: version: TOKEN_VERSION TOKEN_FLOAT { assert($2 >= 3.0); } ;//std::cerr << "Reading PTX version " << $2 << std::endl; }; target: - TOKEN_TARGET TOKEN_STRING { assert(std::string($2) == std::string("sm_35")); } //std::cerr << "Target " << $2 << std::endl; }; + TOKEN_TARGET TOKEN_STRING { + assert( + std::string($2) == std::string("sm_35") + || std::string($2) == std::string("sm_37") + ); } //std::cerr << "Target " << $2 << std::endl; }; address_size: TOKEN_ADDRESS_SIZE TOKEN_INT { assert($2 == 64); } //std::cerr << "Address_Size " << $2 << std::endl; };