From 473f1cb4d2f196e20bed159aef7a041053173f80 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Tue, 10 Dec 2013 20:39:24 +0400 Subject: [PATCH] packed_store_active2 --- builtins.cpp | 1 + builtins/util.m4 | 45 ++++++++++++++++++++++++++++++++++++++ stdlib.ispc | 13 +++++++++++ tests/packed-store2-1.ispc | 16 ++++++++++++++ tests/packed-store2-2.ispc | 21 ++++++++++++++++++ tests/packed-store2-3.ispc | 17 ++++++++++++++ tests/packed-store2.ispc | 15 +++++++++++++ 7 files changed, 128 insertions(+) create mode 100644 tests/packed-store2-1.ispc create mode 100644 tests/packed-store2-2.ispc create mode 100644 tests/packed-store2-3.ispc create mode 100644 tests/packed-store2.ispc diff --git a/builtins.cpp b/builtins.cpp index 2afd92d9..6be41f13 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -488,6 +488,7 @@ lSetInternalFunctions(llvm::Module *module) { "__num_cores", "__packed_load_active", "__packed_store_active", + "__packed_store_active2", "__popcnt_int32", "__popcnt_int64", "__prefetch_read_uniform_1", diff --git a/builtins/util.m4 b/builtins/util.m4 index e1c9bf97..7ce4ab7f 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -3815,6 +3815,51 @@ loopend: done: ret i32 %nextoffset } + +define MASK @__packed_store_active2(i32 * %startptr, %vals, + %full_mask) nounwind alwaysinline { +entry: + %mask = call i64 @__movmsk( %full_mask) + %mask_known = call i1 @__is_compile_time_constant_mask( %full_mask) + br i1 %mask_known, label %known_mask, label %unknown_mask + +known_mask: + %allon = icmp eq i64 %mask, ALL_ON_MASK + br i1 %allon, label %all_on, label %unknown_mask + +all_on: + %vecptr = bitcast i32 *%startptr to * + store %vals, * %vecptr, align 4 + ret MASK WIDTH + +unknown_mask: + br label %loop + +loop: + %offset = phi MASK [ 0, %unknown_mask ], [ %ch_offset, %loop ] + %i = phi i32 [ 0, %unknown_mask ], [ %ch_i, %loop ] + %storeval = extractelement %vals, i32 %i + +;; Offset has value in range from 0 to WIDTH-1. So it does not matter if we +;; zero or sign extending it, while zero extend is free. Also do nothing for +;; i64 MASK, as we need i64 value. +ifelse(MASK, `i64', +` %storeptr = getelementptr i32 *%startptr, MASK %offset', +` %offset1 = zext MASK %offset to i64 + %storeptr = getelementptr i32 *%startptr, i64 %offset1') + store i32 %storeval, i32 *%storeptr + + %mull_mask = extractelement %full_mask, i32 %i + %ch_offset = sub MASK %offset, %mull_mask + + ; are we done yet? + %ch_i = add i32 %i, 1 + %test = icmp ne i32 %ch_i, WIDTH + br i1 %test, label %loop, label %done + +done: + ret MASK %ch_offset +} ') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/stdlib.ispc b/stdlib.ispc index 6768594b..3b17283d 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -1209,6 +1209,13 @@ packed_store_active(uniform unsigned int a[], return __packed_store_active(a, vals, (UIntMaskType)__mask); } +static inline uniform int +packed_store_active2(uniform unsigned int a[], + unsigned int vals) { + return __packed_store_active2(a, vals, (UIntMaskType)__mask); +} + + static inline uniform int packed_load_active(uniform int a[], varying int * uniform vals) { return __packed_load_active(a, vals, (IntMaskType)__mask); @@ -1219,6 +1226,12 @@ packed_store_active(uniform int a[], int vals) { return __packed_store_active(a, vals, (IntMaskType)__mask); } +static inline uniform int +packed_store_active2(uniform int a[], int vals) { + return __packed_store_active2(a, vals, (IntMaskType)__mask); +} + + /////////////////////////////////////////////////////////////////////////// // System information diff --git a/tests/packed-store2-1.ispc b/tests/packed-store2-1.ispc new file mode 100644 index 00000000..0ca3230a --- /dev/null +++ b/tests/packed-store2-1.ispc @@ -0,0 +1,16 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float a = aFOO[programIndex]; + uniform int pack[2+programCount]; + for (uniform int i = 0; i < 2+programCount; ++i) + pack[i] = 0; + packed_store_active2(&pack[2], a); + RET[programIndex] = pack[programIndex]; +} + +export void result(uniform float RET[]) { + RET[programIndex] = programIndex-1; + RET[0] = RET[1] = 0; +} diff --git a/tests/packed-store2-2.ispc b/tests/packed-store2-2.ispc new file mode 100644 index 00000000..c29230ca --- /dev/null +++ b/tests/packed-store2-2.ispc @@ -0,0 +1,21 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float a = aFOO[programIndex]; + uniform int pack[2+programCount]; + uniform int number; + for (uniform int i = 0; i < 2+programCount; ++i) + pack[i] = 0; + if ((int)a & 1) + number = packed_store_active2(&pack[2], a); + pack[2+number] = 0; + RET[programIndex] = pack[programIndex]; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; + uniform int val = 1; + for (uniform int i = 2; i < 2+programCount/2; ++i, val += 2) + RET[i] = val; +} diff --git a/tests/packed-store2-3.ispc b/tests/packed-store2-3.ispc new file mode 100644 index 00000000..9192525e --- /dev/null +++ b/tests/packed-store2-3.ispc @@ -0,0 +1,17 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float a = aFOO[programIndex]; + uniform int pack[2+programCount]; + for (uniform int i = 0; i < 2+programCount; ++i) + pack[i] = 0; + uniform int count = 0; + if ((int)a & 1) + count += packed_store_active2(&pack[2], a); + RET[programIndex] = count; +} + +export void result(uniform float RET[]) { + RET[programIndex] = (programCount == 1) ? 1 : programCount/2; +} diff --git a/tests/packed-store2.ispc b/tests/packed-store2.ispc new file mode 100644 index 00000000..13973bc3 --- /dev/null +++ b/tests/packed-store2.ispc @@ -0,0 +1,15 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { + float a = aFOO[programIndex]; + uniform unsigned int pack[programCount]; + for (uniform int i = 0; i < programCount; ++i) + pack[i] = 0; + packed_store_active2(pack, (unsigned int)a); + RET[programIndex] = pack[programIndex]; +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1 + programIndex; +}