packed_store_active2
This commit is contained in:
@@ -488,6 +488,7 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__num_cores",
|
"__num_cores",
|
||||||
"__packed_load_active",
|
"__packed_load_active",
|
||||||
"__packed_store_active",
|
"__packed_store_active",
|
||||||
|
"__packed_store_active2",
|
||||||
"__popcnt_int32",
|
"__popcnt_int32",
|
||||||
"__popcnt_int64",
|
"__popcnt_int64",
|
||||||
"__prefetch_read_uniform_1",
|
"__prefetch_read_uniform_1",
|
||||||
|
|||||||
@@ -3815,6 +3815,51 @@ loopend:
|
|||||||
done:
|
done:
|
||||||
ret i32 %nextoffset
|
ret i32 %nextoffset
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define MASK @__packed_store_active2(i32 * %startptr, <WIDTH x i32> %vals,
|
||||||
|
<WIDTH x MASK> %full_mask) nounwind alwaysinline {
|
||||||
|
entry:
|
||||||
|
%mask = call i64 @__movmsk(<WIDTH x MASK> %full_mask)
|
||||||
|
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %full_mask)
|
||||||
|
br i1 %mask_known, label %known_mask, label %unknown_mask
|
||||||
|
|
||||||
|
known_mask:
|
||||||
|
%allon = icmp eq i64 %mask, ALL_ON_MASK
|
||||||
|
br i1 %allon, label %all_on, label %unknown_mask
|
||||||
|
|
||||||
|
all_on:
|
||||||
|
%vecptr = bitcast i32 *%startptr to <WIDTH x i32> *
|
||||||
|
store <WIDTH x i32> %vals, <WIDTH x i32> * %vecptr, align 4
|
||||||
|
ret MASK WIDTH
|
||||||
|
|
||||||
|
unknown_mask:
|
||||||
|
br label %loop
|
||||||
|
|
||||||
|
loop:
|
||||||
|
%offset = phi MASK [ 0, %unknown_mask ], [ %ch_offset, %loop ]
|
||||||
|
%i = phi i32 [ 0, %unknown_mask ], [ %ch_i, %loop ]
|
||||||
|
%storeval = extractelement <WIDTH x i32> %vals, i32 %i
|
||||||
|
|
||||||
|
;; Offset has value in range from 0 to WIDTH-1. So it does not matter if we
|
||||||
|
;; zero or sign extending it, while zero extend is free. Also do nothing for
|
||||||
|
;; i64 MASK, as we need i64 value.
|
||||||
|
ifelse(MASK, `i64',
|
||||||
|
` %storeptr = getelementptr i32 *%startptr, MASK %offset',
|
||||||
|
` %offset1 = zext MASK %offset to i64
|
||||||
|
%storeptr = getelementptr i32 *%startptr, i64 %offset1')
|
||||||
|
store i32 %storeval, i32 *%storeptr
|
||||||
|
|
||||||
|
%mull_mask = extractelement <WIDTH x MASK> %full_mask, i32 %i
|
||||||
|
%ch_offset = sub MASK %offset, %mull_mask
|
||||||
|
|
||||||
|
; are we done yet?
|
||||||
|
%ch_i = add i32 %i, 1
|
||||||
|
%test = icmp ne i32 %ch_i, WIDTH
|
||||||
|
br i1 %test, label %loop, label %done
|
||||||
|
|
||||||
|
done:
|
||||||
|
ret MASK %ch_offset
|
||||||
|
}
|
||||||
')
|
')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|||||||
13
stdlib.ispc
13
stdlib.ispc
@@ -1209,6 +1209,13 @@ packed_store_active(uniform unsigned int a[],
|
|||||||
return __packed_store_active(a, vals, (UIntMaskType)__mask);
|
return __packed_store_active(a, vals, (UIntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline uniform int
|
||||||
|
packed_store_active2(uniform unsigned int a[],
|
||||||
|
unsigned int vals) {
|
||||||
|
return __packed_store_active2(a, vals, (UIntMaskType)__mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline uniform int
|
static inline uniform int
|
||||||
packed_load_active(uniform int a[], varying int * uniform vals) {
|
packed_load_active(uniform int a[], varying int * uniform vals) {
|
||||||
return __packed_load_active(a, vals, (IntMaskType)__mask);
|
return __packed_load_active(a, vals, (IntMaskType)__mask);
|
||||||
@@ -1219,6 +1226,12 @@ packed_store_active(uniform int a[], int vals) {
|
|||||||
return __packed_store_active(a, vals, (IntMaskType)__mask);
|
return __packed_store_active(a, vals, (IntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline uniform int
|
||||||
|
packed_store_active2(uniform int a[], int vals) {
|
||||||
|
return __packed_store_active2(a, vals, (IntMaskType)__mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// System information
|
// System information
|
||||||
|
|
||||||
|
|||||||
16
tests/packed-store2-1.ispc
Normal file
16
tests/packed-store2-1.ispc
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
uniform int pack[2+programCount];
|
||||||
|
for (uniform int i = 0; i < 2+programCount; ++i)
|
||||||
|
pack[i] = 0;
|
||||||
|
packed_store_active2(&pack[2], a);
|
||||||
|
RET[programIndex] = pack[programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = programIndex-1;
|
||||||
|
RET[0] = RET[1] = 0;
|
||||||
|
}
|
||||||
21
tests/packed-store2-2.ispc
Normal file
21
tests/packed-store2-2.ispc
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
uniform int pack[2+programCount];
|
||||||
|
uniform int number;
|
||||||
|
for (uniform int i = 0; i < 2+programCount; ++i)
|
||||||
|
pack[i] = 0;
|
||||||
|
if ((int)a & 1)
|
||||||
|
number = packed_store_active2(&pack[2], a);
|
||||||
|
pack[2+number] = 0;
|
||||||
|
RET[programIndex] = pack[programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 0;
|
||||||
|
uniform int val = 1;
|
||||||
|
for (uniform int i = 2; i < 2+programCount/2; ++i, val += 2)
|
||||||
|
RET[i] = val;
|
||||||
|
}
|
||||||
17
tests/packed-store2-3.ispc
Normal file
17
tests/packed-store2-3.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
uniform int pack[2+programCount];
|
||||||
|
for (uniform int i = 0; i < 2+programCount; ++i)
|
||||||
|
pack[i] = 0;
|
||||||
|
uniform int count = 0;
|
||||||
|
if ((int)a & 1)
|
||||||
|
count += packed_store_active2(&pack[2], a);
|
||||||
|
RET[programIndex] = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = (programCount == 1) ? 1 : programCount/2;
|
||||||
|
}
|
||||||
15
tests/packed-store2.ispc
Normal file
15
tests/packed-store2.ispc
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
float a = aFOO[programIndex];
|
||||||
|
uniform unsigned int pack[programCount];
|
||||||
|
for (uniform int i = 0; i < programCount; ++i)
|
||||||
|
pack[i] = 0;
|
||||||
|
packed_store_active2(pack, (unsigned int)a);
|
||||||
|
RET[programIndex] = pack[programIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 1 + programIndex;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user