packed_store_active2

This commit is contained in:
Ilia Filippov
2013-12-10 20:39:24 +04:00
parent 961116f4d5
commit 473f1cb4d2
7 changed files with 128 additions and 0 deletions

View File

@@ -488,6 +488,7 @@ lSetInternalFunctions(llvm::Module *module) {
"__num_cores",
"__packed_load_active",
"__packed_store_active",
"__packed_store_active2",
"__popcnt_int32",
"__popcnt_int64",
"__prefetch_read_uniform_1",

View File

@@ -3815,6 +3815,51 @@ loopend:
done:
ret i32 %nextoffset
}
define MASK @__packed_store_active2(i32 * %startptr, <WIDTH x i32> %vals,
<WIDTH x MASK> %full_mask) nounwind alwaysinline {
entry:
%mask = call i64 @__movmsk(<WIDTH x MASK> %full_mask)
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %full_mask)
br i1 %mask_known, label %known_mask, label %unknown_mask
known_mask:
%allon = icmp eq i64 %mask, ALL_ON_MASK
br i1 %allon, label %all_on, label %unknown_mask
all_on:
%vecptr = bitcast i32 *%startptr to <WIDTH x i32> *
store <WIDTH x i32> %vals, <WIDTH x i32> * %vecptr, align 4
ret MASK WIDTH
unknown_mask:
br label %loop
loop:
%offset = phi MASK [ 0, %unknown_mask ], [ %ch_offset, %loop ]
%i = phi i32 [ 0, %unknown_mask ], [ %ch_i, %loop ]
%storeval = extractelement <WIDTH x i32> %vals, i32 %i
;; Offset has value in range from 0 to WIDTH-1. So it does not matter if we
;; zero or sign extending it, while zero extend is free. Also do nothing for
;; i64 MASK, as we need i64 value.
ifelse(MASK, `i64',
` %storeptr = getelementptr i32 *%startptr, MASK %offset',
` %offset1 = zext MASK %offset to i64
%storeptr = getelementptr i32 *%startptr, i64 %offset1')
store i32 %storeval, i32 *%storeptr
%mull_mask = extractelement <WIDTH x MASK> %full_mask, i32 %i
%ch_offset = sub MASK %offset, %mull_mask
; are we done yet?
%ch_i = add i32 %i, 1
%test = icmp ne i32 %ch_i, WIDTH
br i1 %test, label %loop, label %done
done:
ret MASK %ch_offset
}
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -1209,6 +1209,13 @@ packed_store_active(uniform unsigned int a[],
return __packed_store_active(a, vals, (UIntMaskType)__mask);
}
static inline uniform int
packed_store_active2(uniform unsigned int a[],
unsigned int vals) {
return __packed_store_active2(a, vals, (UIntMaskType)__mask);
}
static inline uniform int
packed_load_active(uniform int a[], varying int * uniform vals) {
return __packed_load_active(a, vals, (IntMaskType)__mask);
@@ -1219,6 +1226,12 @@ packed_store_active(uniform int a[], int vals) {
return __packed_store_active(a, vals, (IntMaskType)__mask);
}
static inline uniform int
packed_store_active2(uniform int a[], int vals) {
return __packed_store_active2(a, vals, (IntMaskType)__mask);
}
///////////////////////////////////////////////////////////////////////////
// System information

View File

@@ -0,0 +1,16 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform int pack[2+programCount];
for (uniform int i = 0; i < 2+programCount; ++i)
pack[i] = 0;
packed_store_active2(&pack[2], a);
RET[programIndex] = pack[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = programIndex-1;
RET[0] = RET[1] = 0;
}

View File

@@ -0,0 +1,21 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform int pack[2+programCount];
uniform int number;
for (uniform int i = 0; i < 2+programCount; ++i)
pack[i] = 0;
if ((int)a & 1)
number = packed_store_active2(&pack[2], a);
pack[2+number] = 0;
RET[programIndex] = pack[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
uniform int val = 1;
for (uniform int i = 2; i < 2+programCount/2; ++i, val += 2)
RET[i] = val;
}

View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform int pack[2+programCount];
for (uniform int i = 0; i < 2+programCount; ++i)
pack[i] = 0;
uniform int count = 0;
if ((int)a & 1)
count += packed_store_active2(&pack[2], a);
RET[programIndex] = count;
}
export void result(uniform float RET[]) {
RET[programIndex] = (programCount == 1) ? 1 : programCount/2;
}

15
tests/packed-store2.ispc Normal file
View File

@@ -0,0 +1,15 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform unsigned int pack[programCount];
for (uniform int i = 0; i < programCount; ++i)
pack[i] = 0;
packed_store_active2(pack, (unsigned int)a);
RET[programIndex] = pack[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = 1 + programIndex;
}