optimization fix

This commit is contained in:
Evghenii
2014-01-29 13:34:45 +01:00
parent 97253354ac
commit 36ee8911b4
2 changed files with 3 additions and 1 deletions

View File

@@ -5,7 +5,7 @@ CXX=g++ -ffast-math
CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_gpu/ -D_CUDA_
#
NVCC=nvcc
NVCC_FLAGS=-O3 -arch=sm_35 -D_CUDA_ -I../ -Xptxas=-v -Iobjs_gpu/
NVCC_FLAGS+=-O3 -arch=sm_35 -D_CUDA_ -I../ -Xptxas=-v -Iobjs_gpu/
ifdef PTXCC_REGMAX
NVCC_FLAGS += --maxrregcount=$(PTXCC_REGMAX)
endif

View File

@@ -1,6 +1,8 @@
PROG=radixSort
ISPC_SRC=radixSort.ispc
CU_SRC=radixSort.cu
NVCC_FLAGS=-Xptxas=-O1
CXX_SRC=radixSort.cpp radixSort.cpp
PTXCC_REGMAX=64