exclusive_scan_and is supported, but must be called outside if-statements. in pricniple other must do the same
This commit is contained in:
@@ -1210,14 +1210,13 @@ define <1 x i32> @__exclusive_scan_and_i32(<1 x i32>, <1 x i1>) nounwind readnon
|
|||||||
{
|
{
|
||||||
%shft = tail call <1 x i32> @__shift_i32(<1 x i32> %0, i32 -1)
|
%shft = tail call <1 x i32> @__shift_i32(<1 x i32> %0, i32 -1)
|
||||||
%v0 = extractelement <1 x i32> %shft, i32 0
|
%v0 = extractelement <1 x i32> %shft, i32 0
|
||||||
%m0 = extractelement <1 x i1 > %1, i32 0
|
%mask = extractelement <1 x i1 > %1, i32 0
|
||||||
|
%v1 = select i1 %mask, i32 %v0, i32 -1
|
||||||
|
|
||||||
%tid = tail call i32 @__tid_x()
|
%tid = tail call i32 @__tid_x()
|
||||||
%lane = and i32 %tid, 31
|
%lane = and i32 %tid, 31
|
||||||
%m1 = icmp eq i32 %lane, 0
|
%c = icmp eq i32 %lane, 0
|
||||||
|
%v = select i1 %c, i32 -1, i32 %v1
|
||||||
%mask = and i1 %m0, %m1
|
|
||||||
%v = select i1 %mask, i32 %v0, i32 -1
|
|
||||||
|
|
||||||
%s1 = tail call i32 @__shfl_scan_and_step_i32(i32 %v, i32 1);
|
%s1 = tail call i32 @__shfl_scan_and_step_i32(i32 %v, i32 1);
|
||||||
%s2 = tail call i32 @__shfl_scan_and_step_i32(i32 %s1, i32 2);
|
%s2 = tail call i32 @__shfl_scan_and_step_i32(i32 %s1, i32 2);
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ DEPTX=dePTX
|
|||||||
NVCC=nvcc
|
NVCC=nvcc
|
||||||
|
|
||||||
$DEPTX < $PTXSRC > $PTXCU &&
|
$DEPTX < $PTXSRC > $PTXCU &&
|
||||||
$NVCC -arch=sm_35 -dc $NVCCPARM -dryrun $PTXCU 2>&1 | \
|
$NVCC -arch=sm_35 -G -dc $NVCCPARM -dryrun $PTXCU 2>&1 | \
|
||||||
sed 's/\#\$//g'| \
|
sed 's/\#\$//g'| \
|
||||||
awk '{ if ($1 == "LIBRARIES=") print $1$2; else if ($1 == "cicc") print "cp '$PTXSRC'", $NF; else print $0 }' > $PTXSH &&
|
awk '{ if ($1 == "LIBRARIES=") print $1$2; else if ($1 == "cicc") print "cp '$PTXSRC'", $NF; else print $0 }' > $PTXSH &&
|
||||||
sh $PTXSH
|
sh $PTXSH
|
||||||
|
|||||||
@@ -4,9 +4,17 @@ export uniform int width() { return programCount; }
|
|||||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
RET[programIndex] = -1;
|
RET[programIndex] = -1;
|
||||||
int32 a = (programIndex & 1) ? 0xff : 0;
|
int32 a = (programIndex & 1) ? 0xff : 0;
|
||||||
|
#if 0
|
||||||
if (programIndex & 1) {
|
if (programIndex & 1) {
|
||||||
RET[programIndex] = exclusive_scan_and(a);
|
RET[programIndex] = exclusive_scan_and(a);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
const bool mask = programIndex & 1;
|
||||||
|
const float res = exclusive_scan_and(mask ? a : -1);
|
||||||
|
if (mask) {
|
||||||
|
RET[programIndex] = res;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -4,9 +4,19 @@ export uniform int width() { return programCount; }
|
|||||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
RET[programIndex] = -1;
|
RET[programIndex] = -1;
|
||||||
int32 a = ~(1ul << programIndex);
|
int32 a = ~(1ul << programIndex);
|
||||||
|
#if 0
|
||||||
if ((programIndex < 32) && (programIndex & 1) == 0) {
|
if ((programIndex < 32) && (programIndex & 1) == 0) {
|
||||||
RET[programIndex] = exclusive_scan_and(a);
|
RET[programIndex] = exclusive_scan_and(a);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
const bool mask = ((programIndex < 32) && (programIndex & 1) == 0);
|
||||||
|
const float res = exclusive_scan_and(mask ? a : -1);
|
||||||
|
if (mask)
|
||||||
|
{
|
||||||
|
RET[programIndex] = res;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user