From 06313e0ec396c03ba557b1006c30464260a800b9 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Wed, 22 Jan 2014 22:12:51 +0100 Subject: [PATCH] exclusive_scan_and is supported, but must be called outside if-statements. in pricniple other must do the same --- builtins/target-nvptx.ll | 9 ++++----- examples_ptx/ptxcc/ptxcc | 2 +- tests/exclusive-scan-and-1.ispc | 8 ++++++++ tests/exclusive-scan-and-2.ispc | 10 ++++++++++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/builtins/target-nvptx.ll b/builtins/target-nvptx.ll index d631f35d..840134c8 100644 --- a/builtins/target-nvptx.ll +++ b/builtins/target-nvptx.ll @@ -1210,14 +1210,13 @@ define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnon { %shft = tail call <1 x i32> @__shift_i32(<1 x i32> %0, i32 -1) %v0 = extractelement <1 x i32> %shft, i32 0 - %m0 = extractelement <1 x i1 > %1, i32 0 + %mask = extractelement <1 x i1 > %1, i32 0 + %v1 = select i1 %mask, i32 %v0, i32 -1 %tid = tail call i32 @__tid_x() %lane = and i32 %tid, 31 - %m1 = icmp eq i32 %lane, 0 - - %mask = and i1 %m0, %m1 - %v = select i1 %mask, i32 %v0, i32 -1 + %c = icmp eq i32 %lane, 0 + %v = select i1 %c, i32 -1, i32 %v1 %s1 = tail call i32 @__shfl_scan_and_step_i32(i32 %v, i32 1); %s2 = tail call i32 @__shfl_scan_and_step_i32(i32 %s1, i32 2); diff --git a/examples_ptx/ptxcc/ptxcc b/examples_ptx/ptxcc/ptxcc index 0f7e384e..ae0fca91 100755 --- a/examples_ptx/ptxcc/ptxcc +++ b/examples_ptx/ptxcc/ptxcc @@ -10,7 +10,7 @@ DEPTX=dePTX NVCC=nvcc $DEPTX < $PTXSRC > $PTXCU && -$NVCC -arch=sm_35 -dc $NVCCPARM -dryrun $PTXCU 2>&1 | \ +$NVCC -arch=sm_35 -G -dc $NVCCPARM -dryrun $PTXCU 2>&1 | \ sed 's/\#\$//g'| \ awk '{ if ($1 == "LIBRARIES=") print $1$2; else if ($1 == "cicc") print "cp '$PTXSRC'", $NF; else print $0 }' > $PTXSH && sh $PTXSH diff --git a/tests/exclusive-scan-and-1.ispc b/tests/exclusive-scan-and-1.ispc index 31347b47..525d0c2a 100644 --- a/tests/exclusive-scan-and-1.ispc +++ b/tests/exclusive-scan-and-1.ispc @@ -4,9 +4,17 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { RET[programIndex] = -1; int32 a = (programIndex & 1) ? 0xff : 0; +#if 0 if (programIndex & 1) { RET[programIndex] = exclusive_scan_and(a); } +#else + const bool mask = programIndex & 1; + const float res = exclusive_scan_and(mask ? a : -1); + if (mask) { + RET[programIndex] = res; + } +#endif } diff --git a/tests/exclusive-scan-and-2.ispc b/tests/exclusive-scan-and-2.ispc index b742a91e..7fd69648 100644 --- a/tests/exclusive-scan-and-2.ispc +++ b/tests/exclusive-scan-and-2.ispc @@ -4,9 +4,19 @@ export uniform int width() { return programCount; } export void f_f(uniform float RET[], uniform float aFOO[]) { RET[programIndex] = -1; int32 a = ~(1ul << programIndex); +#if 0 if ((programIndex < 32) && (programIndex & 1) == 0) { RET[programIndex] = exclusive_scan_and(a); } +#else + const bool mask = ((programIndex < 32) && (programIndex & 1) == 0); + const float res = exclusive_scan_and(mask ? a : -1); + if (mask) + { + RET[programIndex] = res; + } +#endif + }