diff --git a/examples_ptx/stencil/Makefile_gpu b/examples_ptx/stencil/Makefile_gpu index 5b6e4cbf..a31052ca 100644 --- a/examples_ptx/stencil/Makefile_gpu +++ b/examples_ptx/stencil/Makefile_gpu @@ -2,7 +2,7 @@ PROG=stencil ISPC_SRC=stencil.ispc CU_SRC=stencil.cu CXX_SRC=stencil.cpp stencil_serial.cpp -PTXCC_REGMAX=128 +PTXCC_REGMAX=92 LLVM_GPU=1 NVVM_GPU=1 diff --git a/examples_ptx/stencil/stencil.cu b/examples_ptx/stencil/stencil.cu index 28e9bbcf..c5847cb8 100644 --- a/examples_ptx/stencil/stencil.cu +++ b/examples_ptx/stencil/stencil.cu @@ -22,7 +22,7 @@ stencil_step( int x0, int x1, const int x = xb + programIndex; int index = (z * Nxy) + (y * Nx) + x; -#define A_cur(x, y, z) __ldg(&Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]) +#define A_cur(x, y, z) *(&Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]) #define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)] double div = coef0 * A_cur(0, 0, 0) + @@ -38,7 +38,7 @@ stencil_step( int x0, int x1, if (x < x1) A_next(0, 0, 0) = 2.0 * A_cur(0, 0, 0) - A_next(0, 0, 0) + - __ldg(&vsq[index]) * div; + *(&vsq[index]) * div; } } diff --git a/examples_ptx/stencil/stencil.ispc b/examples_ptx/stencil/stencil.ispc index c4746868..05b86a7e 100644 --- a/examples_ptx/stencil/stencil.ispc +++ b/examples_ptx/stencil/stencil.ispc @@ -100,9 +100,15 @@ stencil_step(uniform int x0, uniform int x1, } } +#ifdef __NVPTX__ #define SPANX 32 -#define SPANY 2 -#define SPANZ 4 +#define SPANY 4 +#define SPANZ 8 +#else +#define SPANX 64 +#define SPANY 4 +#define SPANZ 8 +#endif static task void stencil_step_task(uniform int x0, uniform int x1,