diff --git a/examples_ptx/common_gpu.mk b/examples_ptx/common_gpu.mk index a4f5fbbc..adc806bc 100644 --- a/examples_ptx/common_gpu.mk +++ b/examples_ptx/common_gpu.mk @@ -5,7 +5,7 @@ CXX=g++ CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_gpu/ -D_CUDA_ # NVCC=nvcc -NVCC_FLAGS=-O3 -arch=sm_35 -D_CUDA_ +NVCC_FLAGS=-O3 -arch=sm_35 -D_CUDA_ -I../ ifdef PTXCC_REGMAX NVCC_FLAGS += --maxrregcount=$(PTXCC_REGMAX) endif diff --git a/examples_ptx/cuda_helpers.cuh b/examples_ptx/cuda_helpers.cuh new file mode 100644 index 00000000..1a3ff226 --- /dev/null +++ b/examples_ptx/cuda_helpers.cuh @@ -0,0 +1,10 @@ +#pragma once + +#define programCount 32 +#define programIndex (threadIdx.x & 31) +#define taskIndex0 (blockIdx.x*4 + (threadIdx.x >> 5)) +#define taskCount0 (gridDim.x*4) +#define taskIndex1 (blockIdx.y) +#define taskCount1 (gridDim.y) +#define warpIdx (threadIdx.x >> 5) +#define launch(ntx,nty,ntz,func) if (programIndex==0) func<<>>