From bf3b15b74482d2898ebff17e45cf150e5c6ff3d1 Mon Sep 17 00:00:00 2001 From: Evghenii Gaburov Date: Sat, 21 Feb 2015 14:28:47 +0100 Subject: [PATCH 1/2] added support for K80/sm_37 --- examples/portable/aobench/Makefile_ptx | 3 +++ examples/portable/common_ptx.mk | 13 +++++++------ examples/portable/deferred/Makefile_ptx | 1 + examples/portable/nbody_hermite4/Makefile_ptx | 5 +++-- ptxtools/Makefile | 2 +- ptxtools/ptxcc.cpp | 14 ++++++++------ ptxtools/ptxgen.cpp | 14 ++++++++++---- ptxtools/ptxgrammar.yy | 6 +++++- 8 files changed, 38 insertions(+), 20 deletions(-) diff --git a/examples/portable/aobench/Makefile_ptx b/examples/portable/aobench/Makefile_ptx index fb390eb1..ffc5e088 100644 --- a/examples/portable/aobench/Makefile_ptx +++ b/examples/portable/aobench/Makefile_ptx @@ -3,6 +3,9 @@ ISPC_SRC=ao.ispc CU_SRC=ao.cu CXX_SRC=ao.cpp PTXCC_REGMAX=64 +NVARCH=sm_35 +#PTXCC_REGMAX=128 +#NVARCH=sm_37 #ISPC_FLAGS= --opt=disable-uniform-control-flow #LLVM_GPU=1 diff --git a/examples/portable/common_ptx.mk b/examples/portable/common_ptx.mk index cfaa0b02..da693275 100644 --- a/examples/portable/common_ptx.mk +++ b/examples/portable/common_ptx.mk @@ -1,21 +1,22 @@ NVCC_SRC=../../util/nvcc_helpers.cu NVCC_OBJS=objs_ptx/nvcc_helpers_nvcc.o +NVARCH ?= sm_35 # CXX=g++ -ffast-math CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_ptx/ -D_CUDA_ -I../../util -I../../ # NVCC=nvcc -NVCC_FLAGS+=-O3 -arch=sm_35 -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_ptx/ +NVCC_FLAGS+=-O3 -arch=$(NVARCH) -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_ptx/ ifdef PTXCC_REGMAX NVCC_FLAGS += --maxrregcount=$(PTXCC_REGMAX) endif NVCC_FLAGS+=--use_fast_math # LD=nvcc -LDFLAGS=-lcudart -lcudadevrt -arch=sm_35 +LDFLAGS=-lcudart -lcudadevrt -arch=$(NVARCH) # -PTXCC=$(ISPC_HOME)/ptxtools/ptxcc -PTXCC_FLAGS+= -Xptxas=-v +PTXCC=$(ISPC_HOME)/ptxtools/ptxcc --arch=$(NVARCH) +PTXCC_FLAGS+= -Xptxas=-v ifdef PTXCC_REGMAX PTXCC_FLAGS += -maxrregcount=$(PTXCC_REGMAX) endif @@ -41,13 +42,13 @@ CXX_SRC+=ispc_malloc.cpp CXX_OBJS+=objs_ptx/ispc_malloc_gcc.o PTXGEN = $(ISPC_HOME)/ptxtools/ptxgen -PTXGEN += --use_fast_math +PTXGEN += --use_fast_math --arch=$(NVARCH) #LLVM32=$(HOME)/usr/local/llvm/bin-3.2 #LLVM32DIS=$(LLVM32)/bin/llvm-dis LLC=$(LLVM_ROOT)/bin/llc -LLC_FLAGS=-march=nvptx64 -mcpu=sm_35 +LLC_FLAGS=-march=nvptx64 -mcpu=$(NVARCH) # .SUFFIXES: .bc .o .cu .ll diff --git a/examples/portable/deferred/Makefile_ptx b/examples/portable/deferred/Makefile_ptx index 58385e59..99f5bc0b 100644 --- a/examples/portable/deferred/Makefile_ptx +++ b/examples/portable/deferred/Makefile_ptx @@ -3,6 +3,7 @@ ISPC_SRC=kernels.ispc CU_SRC=kernels.cu CXX_SRC=common.cpp main.cpp PTXCC_REGMAX=64 +NVARCH=sm_35 NVVM_GPU=1 #LLVM_GPU=1 diff --git a/examples/portable/nbody_hermite4/Makefile_ptx b/examples/portable/nbody_hermite4/Makefile_ptx index da8b268b..9a68c496 100644 --- a/examples/portable/nbody_hermite4/Makefile_ptx +++ b/examples/portable/nbody_hermite4/Makefile_ptx @@ -2,10 +2,11 @@ PROG=hermite4 ISPC_SRC=hermite4.ispc #CU_SRC=hermite4.cu CXX_SRC=hermite4.cpp -PTXCC_REGMAX=64 +PTXCC_REGMAX=128 #ISPC_FLAGS= --opt=disable-uniform-control-flow +NVARCH=sm_37 -#LLVM_GPU=1 +# LLVM_GPU=1 NVVM_GPU=1 include ../common_ptx.mk diff --git a/ptxtools/Makefile b/ptxtools/Makefile index cc25d5e9..46ba73cf 100644 --- a/ptxtools/Makefile +++ b/ptxtools/Makefile @@ -33,7 +33,7 @@ all: ptxcc ptxgen CXX=clang++ -CXXFLAGS += -O3 --std=c++11 +CXXFLAGS += -O3 --std=c++11 -Wno-deprecated-register CXXFLAGS += -I/opt/local/include LD=clang++ diff --git a/ptxtools/ptxcc.cpp b/ptxtools/ptxcc.cpp index 474ab3ff..0aa5d344 100644 --- a/ptxtools/ptxcc.cpp +++ b/ptxtools/ptxcc.cpp @@ -117,11 +117,13 @@ static std::vector lSplitString(const std::string &s, char delim) static void lUsage(const int ret) { fprintf(stdout, "\nusage: ptxcc [options] file.ptx \n"); - fprintf(stdout, " [--help]\t\t\t\t This help\n"); - fprintf(stdout, " [--verbose]\t\t\t\t Be verbose\n"); - fprintf(stdout, " [--arch={%s}]\t\t\t GPU target architecture\n", "sm_35"); - fprintf(stdout, " [-o ]\t\t\t\t Output file name\n"); - fprintf(stdout, " [-Xnvcc=]\t\t Arguments to pass through to \"nvcc\"\n"); + fprintf(stdout, " [--help]\t\t\t This help\n"); + fprintf(stdout, " [--verbose]\t\t\t Be verbose\n"); + fprintf(stdout, " [--arch=]\t\t\t GPU target architecture\n"); + fprintf(stdout, " \t\t\t\t sm_35 - K20, K40, GK110 chip \n"); + fprintf(stdout, " \t\t\t\t sm_37 - K80, GK210 chip \n"); + fprintf(stdout, " [-o ]\t\t\t Output file name\n"); + fprintf(stdout, " [-Xnvcc=]\t Arguments to pass through to \"nvcc\"\n"); fprintf(stdout, " \n"); exit(ret); } @@ -195,7 +197,7 @@ int main(int _argc, char * _argv[]) for (int i= 0; i < (int)nvccArgumentList.size(); i++) fprintf(stderr, " arg= %d : %s \n", i, nvccArgumentList[i].c_str()); #endif - assert(arch == std::string("sm_35")); + assert(arch == std::string("sm_35") || arch == std::string("sm_37")); if (filePTX.empty()) { fprintf(stderr, "ptxcc fatal : No input file specified; use option --help for more information\n"); diff --git a/ptxtools/ptxgen.cpp b/ptxtools/ptxgen.cpp index 7d764d08..c3431c8f 100644 --- a/ptxtools/ptxgen.cpp +++ b/ptxtools/ptxgen.cpp @@ -76,7 +76,7 @@ struct NVVMProg nvvmProgram get() const {return prog; } }; -static std::string getLibDeviceName(const int computeArch) +static std::string getLibDeviceName(int computeArch) { const char *env = getenv("LIBNVVM_HOME"); #ifdef LIBNVVM_HOME @@ -97,6 +97,7 @@ static std::string getLibDeviceName(const int computeArch) /* Use libdevice for compute_20, if the target is not compute_20, compute_30, * or compute_35. */ + if (computeArch == 37) computeArch = 35; const std::string libdevice = std::string("/libdevice/libdevice.compute_") + lValueToString(computeArch)+ "." + @@ -219,7 +220,9 @@ static void lUsage(const int ret) fprintf(stdout, "\nusage: ptxgen [options] file.[ll,bc] \n"); fprintf(stdout, " [--help]\t\t This help\n"); fprintf(stdout, " [--verbose]\t\t Be verbose\n"); - fprintf(stdout, " [--arch={%s}]\t GPU target architecture\n", "sm_35"); + fprintf(stdout, " [--arch=]\t\t GPU target architecture\n"); + fprintf(stdout, " \t\t\t sm_35 - K20, K40, GK110 chip \n"); + fprintf(stdout, " \t\t\t sm_37 - K80, GK210 chip \n"); fprintf(stdout, " [-o ]\t\t Output file name\n"); fprintf(stdout, " [-g]\t\t Enable generation of debuggin information \n"); fprintf(stdout, " [--opt=]\t\t Optimization parameters \n"); @@ -334,7 +337,10 @@ int main(int argc, char *argv[]) #endif int computeArch = 35; - assert(_arch == std::string("sm_35")); + assert(_arch == std::string("sm_35") || _arch == std::string("sm_37")); + + if (_arch == std::string("sm_37")) + computeArch = 37; if (_useFastMath) { @@ -343,7 +349,7 @@ int main(int argc, char *argv[]) } std::vector nvvmOptions; - nvvmOptions.push_back("-arch=compute_35"); + nvvmOptions.push_back("-arch=compute_"+std::to_string(computeArch)); nvvmOptions.push_back("-ftz=" + lValueToString(_ftz)); nvvmOptions.push_back("-prec-sqrt=" + lValueToString(_precSqrt)); nvvmOptions.push_back("-prec-div=" + lValueToString(_precDiv)); diff --git a/ptxtools/ptxgrammar.yy b/ptxtools/ptxgrammar.yy index 13120739..3cdc3f36 100644 --- a/ptxtools/ptxgrammar.yy +++ b/ptxtools/ptxgrammar.yy @@ -116,7 +116,11 @@ header: version: TOKEN_VERSION TOKEN_FLOAT { assert($2 >= 3.0); } ;//std::cerr << "Reading PTX version " << $2 << std::endl; }; target: - TOKEN_TARGET TOKEN_STRING { assert(std::string($2) == std::string("sm_35")); } //std::cerr << "Target " << $2 << std::endl; }; + TOKEN_TARGET TOKEN_STRING { + assert( + std::string($2) == std::string("sm_35") + || std::string($2) == std::string("sm_37") + ); } //std::cerr << "Target " << $2 << std::endl; }; address_size: TOKEN_ADDRESS_SIZE TOKEN_INT { assert($2 == 64); } //std::cerr << "Address_Size " << $2 << std::endl; }; From 66f306f3256bff97ea08b9c9044fb7b0c627ecf5 Mon Sep 17 00:00:00 2001 From: Evghenii Gaburov Date: Sun, 22 Feb 2015 12:17:37 +0100 Subject: [PATCH 2/2] added support for multiple architectures. right now, support is tested only for sm_35 and sm_37 --- ptxtools/GPUTargets.h | 40 ++++++++++++++++++++++++++++++++++++++++ ptxtools/ptxcc.cpp | 19 +++++++++++++------ ptxtools/ptxgen.cpp | 36 ++++++++++++++++++++++-------------- ptxtools/ptxgrammar.yy | 14 +++++++++----- 4 files changed, 84 insertions(+), 25 deletions(-) create mode 100644 ptxtools/GPUTargets.h diff --git a/ptxtools/GPUTargets.h b/ptxtools/GPUTargets.h new file mode 100644 index 00000000..8d73d26b --- /dev/null +++ b/ptxtools/GPUTargets.h @@ -0,0 +1,40 @@ +#pragma once + +// -*- mode: c++ -*- +/* + Copyright (c) 2015, Evghenii Gaburov + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +namespace GPUTargets +{ + static std::vector computeMode = {"sm_35", "sm_37"}; +} diff --git a/ptxtools/ptxcc.cpp b/ptxtools/ptxcc.cpp index 0aa5d344..2dad25d2 100644 --- a/ptxtools/ptxcc.cpp +++ b/ptxtools/ptxcc.cpp @@ -1,6 +1,6 @@ // -*- mode: c++ -*- /* - Copyright (c) 2014, Evghenii Gaburov + Copyright (c) 2014-2015, Evghenii Gaburov All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ met: #include #include #include "PTXParser.h" +#include "GPUTargets.h" /* @@ -119,9 +120,11 @@ static void lUsage(const int ret) fprintf(stdout, "\nusage: ptxcc [options] file.ptx \n"); fprintf(stdout, " [--help]\t\t\t This help\n"); fprintf(stdout, " [--verbose]\t\t\t Be verbose\n"); - fprintf(stdout, " [--arch=]\t\t\t GPU target architecture\n"); - fprintf(stdout, " \t\t\t\t sm_35 - K20, K40, GK110 chip \n"); - fprintf(stdout, " \t\t\t\t sm_37 - K80, GK210 chip \n"); + fprintf(stdout, " [--arch=]\t\t\t GPU target architectures:\n"); + fprintf(stdout, " \t\t\t\t "); + for (const auto& mode : GPUTargets::computeMode) + fprintf(stdout, "%s ", mode.c_str()); + fprintf(stdout, "\n"); fprintf(stdout, " [-o ]\t\t\t Output file name\n"); fprintf(stdout, " [-Xnvcc=]\t Arguments to pass through to \"nvcc\"\n"); fprintf(stdout, " \n"); @@ -134,7 +137,7 @@ int main(int _argc, char * _argv[]) char *argv[128]; lGetAllArgs(_argc, _argv, argc, argv); - std::string arch="sm_35"; + std::string arch = GPUTargets::computeMode.front(); std::string filePTX; std::string fileOBJ; std::string extString = ".ptx"; @@ -197,7 +200,11 @@ int main(int _argc, char * _argv[]) for (int i= 0; i < (int)nvccArgumentList.size(); i++) fprintf(stderr, " arg= %d : %s \n", i, nvccArgumentList[i].c_str()); #endif - assert(arch == std::string("sm_35") || arch == std::string("sm_37")); + if (std::find(GPUTargets::computeMode.begin(), GPUTargets::computeMode.end(), arch) == GPUTargets::computeMode.end()) + { + fprintf(stderr, "ptxcc fatal : --arch=%s is not supported; use option --help for more information\n", arch.c_str()); + exit(1); + } if (filePTX.empty()) { fprintf(stderr, "ptxcc fatal : No input file specified; use option --help for more information\n"); diff --git a/ptxtools/ptxgen.cpp b/ptxtools/ptxgen.cpp index c3431c8f..31e17f78 100644 --- a/ptxtools/ptxgen.cpp +++ b/ptxtools/ptxgen.cpp @@ -1,6 +1,6 @@ // -*- mode: c++ -*- /* - Copyright (c) 2014, Evghenii Gaburov + Copyright (c) 2014-2015, Evghenii Gaburov All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,6 +43,8 @@ met: #include #include #include +#include +#include "GPUTargets.h" #include #include @@ -51,9 +53,7 @@ met: template static std::string lValueToString(const T& value) { - std::ostringstream oss; - oss << value; - return oss.str(); + return std::to_string(value); } struct Exception : public std::exception @@ -220,9 +220,11 @@ static void lUsage(const int ret) fprintf(stdout, "\nusage: ptxgen [options] file.[ll,bc] \n"); fprintf(stdout, " [--help]\t\t This help\n"); fprintf(stdout, " [--verbose]\t\t Be verbose\n"); - fprintf(stdout, " [--arch=]\t\t GPU target architecture\n"); - fprintf(stdout, " \t\t\t sm_35 - K20, K40, GK110 chip \n"); - fprintf(stdout, " \t\t\t sm_37 - K80, GK210 chip \n"); + fprintf(stdout, " [--arch=]\t\t\t GPU target architectures:\n"); + fprintf(stdout, " \t\t\t\t "); + for (const auto& mode : GPUTargets::computeMode) + fprintf(stdout, "%s ", mode.c_str()); + fprintf(stdout, "\n"); fprintf(stdout, " [-o ]\t\t Output file name\n"); fprintf(stdout, " [-g]\t\t Enable generation of debuggin information \n"); fprintf(stdout, " [--opt=]\t\t Optimization parameters \n"); @@ -255,7 +257,7 @@ int main(int argc, char *argv[]) bool _useFastMath = false; bool _debug = false; bool _verbose = false; - std::string _arch = "sm_35"; + std::string _arch = GPUTargets::computeMode.front(); std::string fileIR, filePTX; for (int i = 1; i < argc; ++i) @@ -336,11 +338,11 @@ int main(int argc, char *argv[]) fprintf(stderr, "use_fast_math= %s\n", _useFastMath ? "true" : "false"); #endif - int computeArch = 35; - assert(_arch == std::string("sm_35") || _arch == std::string("sm_37")); - - if (_arch == std::string("sm_37")) - computeArch = 37; + if (std::find(GPUTargets::computeMode.begin(), GPUTargets::computeMode.end(), _arch) == GPUTargets::computeMode.end()) + { + fprintf(stderr, "ptxcc fatal : --arch=%s is not supported; use option --help for more information\n", _arch.c_str()); + exit(1); + } if (_useFastMath) { @@ -348,8 +350,14 @@ int main(int argc, char *argv[]) _precSqrt = _precDiv = 0; } + /* replace "sm" with "compute" */ + assert(_arch[0] == 's' && _arch[1] == 'm' && _arch[2] == '_'); + const std::string _mode = std::string("compute_") + &_arch[3]; + const int computeArch = atoi(&_arch[3]); + + std::vector nvvmOptions; - nvvmOptions.push_back("-arch=compute_"+std::to_string(computeArch)); + nvvmOptions.push_back("-arch=" + _mode); nvvmOptions.push_back("-ftz=" + lValueToString(_ftz)); nvvmOptions.push_back("-prec-sqrt=" + lValueToString(_precSqrt)); nvvmOptions.push_back("-prec-div=" + lValueToString(_precDiv)); diff --git a/ptxtools/ptxgrammar.yy b/ptxtools/ptxgrammar.yy index 3cdc3f36..61db3fca 100644 --- a/ptxtools/ptxgrammar.yy +++ b/ptxtools/ptxgrammar.yy @@ -1,5 +1,5 @@ /* - Copyright (c) 2014, Evghenii Gaburov + Copyright (c) 2014-2015, Evghenii Gaburov All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,6 +43,8 @@ met: #include #include #include + #include + #include "GPUTargets.h" #define YYERROR_VERBOSE 1 @@ -117,10 +119,12 @@ version: TOKEN_VERSION TOKEN_FLOAT { assert($2 >= 3.0); } ;//std::cerr << "Reading PTX version " << $2 << std::endl; }; target: TOKEN_TARGET TOKEN_STRING { - assert( - std::string($2) == std::string("sm_35") - || std::string($2) == std::string("sm_37") - ); } //std::cerr << "Target " << $2 << std::endl; }; + if (std::find(GPUTargets::computeMode.begin(), GPUTargets::computeMode.end(), std::string($2)) == GPUTargets::computeMode.end()) + { + fprintf(stderr, "ptxcc fatal : Found wrong Target=\"%s\" in ptx file\n", $2); + exit(-1); + } + } address_size: TOKEN_ADDRESS_SIZE TOKEN_INT { assert($2 == 64); } //std::cerr << "Address_Size " << $2 << std::endl; };