packed_store_active2
This commit is contained in:
@@ -488,6 +488,7 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__num_cores",
|
||||
"__packed_load_active",
|
||||
"__packed_store_active",
|
||||
"__packed_store_active2",
|
||||
"__popcnt_int32",
|
||||
"__popcnt_int64",
|
||||
"__prefetch_read_uniform_1",
|
||||
|
||||
@@ -3815,6 +3815,51 @@ loopend:
|
||||
done:
|
||||
ret i32 %nextoffset
|
||||
}
|
||||
|
||||
define MASK @__packed_store_active2(i32 * %startptr, <WIDTH x i32> %vals,
|
||||
<WIDTH x MASK> %full_mask) nounwind alwaysinline {
|
||||
entry:
|
||||
%mask = call i64 @__movmsk(<WIDTH x MASK> %full_mask)
|
||||
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %full_mask)
|
||||
br i1 %mask_known, label %known_mask, label %unknown_mask
|
||||
|
||||
known_mask:
|
||||
%allon = icmp eq i64 %mask, ALL_ON_MASK
|
||||
br i1 %allon, label %all_on, label %unknown_mask
|
||||
|
||||
all_on:
|
||||
%vecptr = bitcast i32 *%startptr to <WIDTH x i32> *
|
||||
store <WIDTH x i32> %vals, <WIDTH x i32> * %vecptr, align 4
|
||||
ret MASK WIDTH
|
||||
|
||||
unknown_mask:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%offset = phi MASK [ 0, %unknown_mask ], [ %ch_offset, %loop ]
|
||||
%i = phi i32 [ 0, %unknown_mask ], [ %ch_i, %loop ]
|
||||
%storeval = extractelement <WIDTH x i32> %vals, i32 %i
|
||||
|
||||
;; Offset has value in range from 0 to WIDTH-1. So it does not matter if we
|
||||
;; zero or sign extending it, while zero extend is free. Also do nothing for
|
||||
;; i64 MASK, as we need i64 value.
|
||||
ifelse(MASK, `i64',
|
||||
` %storeptr = getelementptr i32 *%startptr, MASK %offset',
|
||||
` %offset1 = zext MASK %offset to i64
|
||||
%storeptr = getelementptr i32 *%startptr, i64 %offset1')
|
||||
store i32 %storeval, i32 *%storeptr
|
||||
|
||||
%mull_mask = extractelement <WIDTH x MASK> %full_mask, i32 %i
|
||||
%ch_offset = sub MASK %offset, %mull_mask
|
||||
|
||||
; are we done yet?
|
||||
%ch_i = add i32 %i, 1
|
||||
%test = icmp ne i32 %ch_i, WIDTH
|
||||
br i1 %test, label %loop, label %done
|
||||
|
||||
done:
|
||||
ret MASK %ch_offset
|
||||
}
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
13
stdlib.ispc
13
stdlib.ispc
@@ -1209,6 +1209,13 @@ packed_store_active(uniform unsigned int a[],
|
||||
return __packed_store_active(a, vals, (UIntMaskType)__mask);
|
||||
}
|
||||
|
||||
static inline uniform int
|
||||
packed_store_active2(uniform unsigned int a[],
|
||||
unsigned int vals) {
|
||||
return __packed_store_active2(a, vals, (UIntMaskType)__mask);
|
||||
}
|
||||
|
||||
|
||||
static inline uniform int
|
||||
packed_load_active(uniform int a[], varying int * uniform vals) {
|
||||
return __packed_load_active(a, vals, (IntMaskType)__mask);
|
||||
@@ -1219,6 +1226,12 @@ packed_store_active(uniform int a[], int vals) {
|
||||
return __packed_store_active(a, vals, (IntMaskType)__mask);
|
||||
}
|
||||
|
||||
static inline uniform int
|
||||
packed_store_active2(uniform int a[], int vals) {
|
||||
return __packed_store_active2(a, vals, (IntMaskType)__mask);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// System information
|
||||
|
||||
|
||||
16
tests/packed-store2-1.ispc
Normal file
16
tests/packed-store2-1.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform int pack[2+programCount];
|
||||
for (uniform int i = 0; i < 2+programCount; ++i)
|
||||
pack[i] = 0;
|
||||
packed_store_active2(&pack[2], a);
|
||||
RET[programIndex] = pack[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex-1;
|
||||
RET[0] = RET[1] = 0;
|
||||
}
|
||||
21
tests/packed-store2-2.ispc
Normal file
21
tests/packed-store2-2.ispc
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform int pack[2+programCount];
|
||||
uniform int number;
|
||||
for (uniform int i = 0; i < 2+programCount; ++i)
|
||||
pack[i] = 0;
|
||||
if ((int)a & 1)
|
||||
number = packed_store_active2(&pack[2], a);
|
||||
pack[2+number] = 0;
|
||||
RET[programIndex] = pack[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
uniform int val = 1;
|
||||
for (uniform int i = 2; i < 2+programCount/2; ++i, val += 2)
|
||||
RET[i] = val;
|
||||
}
|
||||
17
tests/packed-store2-3.ispc
Normal file
17
tests/packed-store2-3.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform int pack[2+programCount];
|
||||
for (uniform int i = 0; i < 2+programCount; ++i)
|
||||
pack[i] = 0;
|
||||
uniform int count = 0;
|
||||
if ((int)a & 1)
|
||||
count += packed_store_active2(&pack[2], a);
|
||||
RET[programIndex] = count;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (programCount == 1) ? 1 : programCount/2;
|
||||
}
|
||||
15
tests/packed-store2.ispc
Normal file
15
tests/packed-store2.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform unsigned int pack[programCount];
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
pack[i] = 0;
|
||||
packed_store_active2(pack, (unsigned int)a);
|
||||
RET[programIndex] = pack[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1 + programIndex;
|
||||
}
|
||||
Reference in New Issue
Block a user