added support for K80/sm_37
This commit is contained in:
@@ -3,6 +3,9 @@ ISPC_SRC=ao.ispc
|
||||
CU_SRC=ao.cu
|
||||
CXX_SRC=ao.cpp
|
||||
PTXCC_REGMAX=64
|
||||
NVARCH=sm_35
|
||||
#PTXCC_REGMAX=128
|
||||
#NVARCH=sm_37
|
||||
#ISPC_FLAGS= --opt=disable-uniform-control-flow
|
||||
|
||||
#LLVM_GPU=1
|
||||
|
||||
@@ -1,21 +1,22 @@
|
||||
NVCC_SRC=../../util/nvcc_helpers.cu
|
||||
NVCC_OBJS=objs_ptx/nvcc_helpers_nvcc.o
|
||||
NVARCH ?= sm_35
|
||||
#
|
||||
CXX=g++ -ffast-math
|
||||
CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_ptx/ -D_CUDA_ -I../../util -I../../
|
||||
#
|
||||
NVCC=nvcc
|
||||
NVCC_FLAGS+=-O3 -arch=sm_35 -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_ptx/
|
||||
NVCC_FLAGS+=-O3 -arch=$(NVARCH) -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_ptx/
|
||||
ifdef PTXCC_REGMAX
|
||||
NVCC_FLAGS += --maxrregcount=$(PTXCC_REGMAX)
|
||||
endif
|
||||
NVCC_FLAGS+=--use_fast_math
|
||||
#
|
||||
LD=nvcc
|
||||
LDFLAGS=-lcudart -lcudadevrt -arch=sm_35
|
||||
LDFLAGS=-lcudart -lcudadevrt -arch=$(NVARCH)
|
||||
#
|
||||
PTXCC=$(ISPC_HOME)/ptxtools/ptxcc
|
||||
PTXCC_FLAGS+= -Xptxas=-v
|
||||
PTXCC=$(ISPC_HOME)/ptxtools/ptxcc --arch=$(NVARCH)
|
||||
PTXCC_FLAGS+= -Xptxas=-v
|
||||
ifdef PTXCC_REGMAX
|
||||
PTXCC_FLAGS += -maxrregcount=$(PTXCC_REGMAX)
|
||||
endif
|
||||
@@ -41,13 +42,13 @@ CXX_SRC+=ispc_malloc.cpp
|
||||
CXX_OBJS+=objs_ptx/ispc_malloc_gcc.o
|
||||
|
||||
PTXGEN = $(ISPC_HOME)/ptxtools/ptxgen
|
||||
PTXGEN += --use_fast_math
|
||||
PTXGEN += --use_fast_math --arch=$(NVARCH)
|
||||
|
||||
#LLVM32=$(HOME)/usr/local/llvm/bin-3.2
|
||||
#LLVM32DIS=$(LLVM32)/bin/llvm-dis
|
||||
|
||||
LLC=$(LLVM_ROOT)/bin/llc
|
||||
LLC_FLAGS=-march=nvptx64 -mcpu=sm_35
|
||||
LLC_FLAGS=-march=nvptx64 -mcpu=$(NVARCH)
|
||||
|
||||
# .SUFFIXES: .bc .o .cu .ll
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ ISPC_SRC=kernels.ispc
|
||||
CU_SRC=kernels.cu
|
||||
CXX_SRC=common.cpp main.cpp
|
||||
PTXCC_REGMAX=64
|
||||
NVARCH=sm_35
|
||||
|
||||
NVVM_GPU=1
|
||||
#LLVM_GPU=1
|
||||
|
||||
@@ -2,10 +2,11 @@ PROG=hermite4
|
||||
ISPC_SRC=hermite4.ispc
|
||||
#CU_SRC=hermite4.cu
|
||||
CXX_SRC=hermite4.cpp
|
||||
PTXCC_REGMAX=64
|
||||
PTXCC_REGMAX=128
|
||||
#ISPC_FLAGS= --opt=disable-uniform-control-flow
|
||||
NVARCH=sm_37
|
||||
|
||||
#LLVM_GPU=1
|
||||
# LLVM_GPU=1
|
||||
NVVM_GPU=1
|
||||
|
||||
include ../common_ptx.mk
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
all: ptxcc ptxgen
|
||||
|
||||
CXX=clang++
|
||||
CXXFLAGS += -O3 --std=c++11
|
||||
CXXFLAGS += -O3 --std=c++11 -Wno-deprecated-register
|
||||
CXXFLAGS += -I/opt/local/include
|
||||
|
||||
LD=clang++
|
||||
|
||||
@@ -117,11 +117,13 @@ static std::vector<std::string> lSplitString(const std::string &s, char delim)
|
||||
static void lUsage(const int ret)
|
||||
{
|
||||
fprintf(stdout, "\nusage: ptxcc [options] file.ptx \n");
|
||||
fprintf(stdout, " [--help]\t\t\t\t This help\n");
|
||||
fprintf(stdout, " [--verbose]\t\t\t\t Be verbose\n");
|
||||
fprintf(stdout, " [--arch={%s}]\t\t\t GPU target architecture\n", "sm_35");
|
||||
fprintf(stdout, " [-o <name>]\t\t\t\t Output file name\n");
|
||||
fprintf(stdout, " [-Xnvcc=<arguments>]\t\t Arguments to pass through to \"nvcc\"\n");
|
||||
fprintf(stdout, " [--help]\t\t\t This help\n");
|
||||
fprintf(stdout, " [--verbose]\t\t\t Be verbose\n");
|
||||
fprintf(stdout, " [--arch=]\t\t\t GPU target architecture\n");
|
||||
fprintf(stdout, " \t\t\t\t sm_35 - K20, K40, GK110 chip \n");
|
||||
fprintf(stdout, " \t\t\t\t sm_37 - K80, GK210 chip \n");
|
||||
fprintf(stdout, " [-o <name>]\t\t\t Output file name\n");
|
||||
fprintf(stdout, " [-Xnvcc=<arguments>]\t Arguments to pass through to \"nvcc\"\n");
|
||||
fprintf(stdout, " \n");
|
||||
exit(ret);
|
||||
}
|
||||
@@ -195,7 +197,7 @@ int main(int _argc, char * _argv[])
|
||||
for (int i= 0; i < (int)nvccArgumentList.size(); i++)
|
||||
fprintf(stderr, " arg= %d : %s \n", i, nvccArgumentList[i].c_str());
|
||||
#endif
|
||||
assert(arch == std::string("sm_35"));
|
||||
assert(arch == std::string("sm_35") || arch == std::string("sm_37"));
|
||||
if (filePTX.empty())
|
||||
{
|
||||
fprintf(stderr, "ptxcc fatal : No input file specified; use option --help for more information\n");
|
||||
|
||||
@@ -76,7 +76,7 @@ struct NVVMProg
|
||||
nvvmProgram get() const {return prog; }
|
||||
};
|
||||
|
||||
static std::string getLibDeviceName(const int computeArch)
|
||||
static std::string getLibDeviceName(int computeArch)
|
||||
{
|
||||
const char *env = getenv("LIBNVVM_HOME");
|
||||
#ifdef LIBNVVM_HOME
|
||||
@@ -97,6 +97,7 @@ static std::string getLibDeviceName(const int computeArch)
|
||||
|
||||
/* Use libdevice for compute_20, if the target is not compute_20, compute_30,
|
||||
* or compute_35. */
|
||||
if (computeArch == 37) computeArch = 35;
|
||||
const std::string libdevice =
|
||||
std::string("/libdevice/libdevice.compute_") +
|
||||
lValueToString(computeArch)+ "." +
|
||||
@@ -219,7 +220,9 @@ static void lUsage(const int ret)
|
||||
fprintf(stdout, "\nusage: ptxgen [options] file.[ll,bc] \n");
|
||||
fprintf(stdout, " [--help]\t\t This help\n");
|
||||
fprintf(stdout, " [--verbose]\t\t Be verbose\n");
|
||||
fprintf(stdout, " [--arch={%s}]\t GPU target architecture\n", "sm_35");
|
||||
fprintf(stdout, " [--arch=]\t\t GPU target architecture\n");
|
||||
fprintf(stdout, " \t\t\t sm_35 - K20, K40, GK110 chip \n");
|
||||
fprintf(stdout, " \t\t\t sm_37 - K80, GK210 chip \n");
|
||||
fprintf(stdout, " [-o <name>]\t\t Output file name\n");
|
||||
fprintf(stdout, " [-g]\t\t Enable generation of debuggin information \n");
|
||||
fprintf(stdout, " [--opt=]\t\t Optimization parameters \n");
|
||||
@@ -334,7 +337,10 @@ int main(int argc, char *argv[])
|
||||
#endif
|
||||
|
||||
int computeArch = 35;
|
||||
assert(_arch == std::string("sm_35"));
|
||||
assert(_arch == std::string("sm_35") || _arch == std::string("sm_37"));
|
||||
|
||||
if (_arch == std::string("sm_37"))
|
||||
computeArch = 37;
|
||||
|
||||
if (_useFastMath)
|
||||
{
|
||||
@@ -343,7 +349,7 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
std::vector<std::string> nvvmOptions;
|
||||
nvvmOptions.push_back("-arch=compute_35");
|
||||
nvvmOptions.push_back("-arch=compute_"+std::to_string(computeArch));
|
||||
nvvmOptions.push_back("-ftz=" + lValueToString(_ftz));
|
||||
nvvmOptions.push_back("-prec-sqrt=" + lValueToString(_precSqrt));
|
||||
nvvmOptions.push_back("-prec-div=" + lValueToString(_precDiv));
|
||||
|
||||
@@ -116,7 +116,11 @@ header:
|
||||
version:
|
||||
TOKEN_VERSION TOKEN_FLOAT { assert($2 >= 3.0); } ;//std::cerr << "Reading PTX version " << $2 << std::endl; };
|
||||
target:
|
||||
TOKEN_TARGET TOKEN_STRING { assert(std::string($2) == std::string("sm_35")); } //std::cerr << "Target " << $2 << std::endl; };
|
||||
TOKEN_TARGET TOKEN_STRING {
|
||||
assert(
|
||||
std::string($2) == std::string("sm_35")
|
||||
|| std::string($2) == std::string("sm_37")
|
||||
); } //std::cerr << "Target " << $2 << std::endl; };
|
||||
address_size:
|
||||
TOKEN_ADDRESS_SIZE TOKEN_INT { assert($2 == 64); } //std::cerr << "Address_Size " << $2 << std::endl; };
|
||||
|
||||
|
||||
Reference in New Issue
Block a user