exclusive_scan_and is supported, but must be called outside if-statements. in pricniple other must do the same

This commit is contained in:
Evghenii
2014-01-22 22:12:51 +01:00
parent 08d78e6be5
commit 06313e0ec3
4 changed files with 23 additions and 6 deletions

View File

@@ -1210,14 +1210,13 @@ define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnon
{ {
%shft = tail call <1 x i32> @__shift_i32(<1 x i32> %0, i32 -1) %shft = tail call <1 x i32> @__shift_i32(<1 x i32> %0, i32 -1)
%v0 = extractelement <1 x i32> %shft, i32 0 %v0 = extractelement <1 x i32> %shft, i32 0
%m0 = extractelement <1 x i1 > %1, i32 0 %mask = extractelement <1 x i1 > %1, i32 0
%v1 = select i1 %mask, i32 %v0, i32 -1
%tid = tail call i32 @__tid_x() %tid = tail call i32 @__tid_x()
%lane = and i32 %tid, 31 %lane = and i32 %tid, 31
%m1 = icmp eq i32 %lane, 0 %c = icmp eq i32 %lane, 0
%v = select i1 %c, i32 -1, i32 %v1
%mask = and i1 %m0, %m1
%v = select i1 %mask, i32 %v0, i32 -1
%s1 = tail call i32 @__shfl_scan_and_step_i32(i32 %v, i32 1); %s1 = tail call i32 @__shfl_scan_and_step_i32(i32 %v, i32 1);
%s2 = tail call i32 @__shfl_scan_and_step_i32(i32 %s1, i32 2); %s2 = tail call i32 @__shfl_scan_and_step_i32(i32 %s1, i32 2);

View File

@@ -10,7 +10,7 @@ DEPTX=dePTX
NVCC=nvcc NVCC=nvcc
$DEPTX < $PTXSRC > $PTXCU && $DEPTX < $PTXSRC > $PTXCU &&
$NVCC -arch=sm_35 -dc $NVCCPARM -dryrun $PTXCU 2>&1 | \ $NVCC -arch=sm_35 -G -dc $NVCCPARM -dryrun $PTXCU 2>&1 | \
sed 's/\#\$//g'| \ sed 's/\#\$//g'| \
awk '{ if ($1 == "LIBRARIES=") print $1$2; else if ($1 == "cicc") print "cp '$PTXSRC'", $NF; else print $0 }' > $PTXSH && awk '{ if ($1 == "LIBRARIES=") print $1$2; else if ($1 == "cicc") print "cp '$PTXSRC'", $NF; else print $0 }' > $PTXSH &&
sh $PTXSH sh $PTXSH

View File

@@ -4,9 +4,17 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = -1; RET[programIndex] = -1;
int32 a = (programIndex & 1) ? 0xff : 0; int32 a = (programIndex & 1) ? 0xff : 0;
#if 0
if (programIndex & 1) { if (programIndex & 1) {
RET[programIndex] = exclusive_scan_and(a); RET[programIndex] = exclusive_scan_and(a);
} }
#else
const bool mask = programIndex & 1;
const float res = exclusive_scan_and(mask ? a : -1);
if (mask) {
RET[programIndex] = res;
}
#endif
} }

View File

@@ -4,9 +4,19 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = -1; RET[programIndex] = -1;
int32 a = ~(1ul << programIndex); int32 a = ~(1ul << programIndex);
#if 0
if ((programIndex < 32) && (programIndex & 1) == 0) { if ((programIndex < 32) && (programIndex & 1) == 0) {
RET[programIndex] = exclusive_scan_and(a); RET[programIndex] = exclusive_scan_and(a);
} }
#else
const bool mask = ((programIndex < 32) && (programIndex & 1) == 0);
const float res = exclusive_scan_and(mask ? a : -1);
if (mask)
{
RET[programIndex] = res;
}
#endif
} }