For << and >> with C++, detect when all instances are shifting by the same amount.

In this case, we now emit calls to potentially-specialized functions for the left/right shifts that take a single integer value for the shift amount. These in turn can be matched to the corresponding intrinsics for the SSE target. Issue #145.
2012-01-19 10:04:32 -07:00
parent 3f89295d10
commit 68f6ea8def
6 changed files with 433 additions and 280 deletions
--- a/examples/intrinsics/generic-16.h
+++ b/examples/intrinsics/generic-16.h
@@ -251,6 +251,14 @@ static FORCEINLINE TYPE __select(bool cond, TYPE a, TYPE b) {       \
    return cond ? a : b;                                            \
 }

+#define SHIFT_UNIFORM(TYPE, CAST, NAME, OP)                         \
+static FORCEINLINE TYPE NAME(TYPE a, int32_t b) {                   \
+   TYPE ret;                                                        \
+   for (int i = 0; i < 16; ++i)                                     \
+       ret.v[i] = (CAST)(a.v[i]) OP b;                              \
+   return ret;                                                      \
+}
+
 #define SMEAR(VTYPE, NAME, STYPE)               \
 static FORCEINLINE VTYPE __smear_##NAME(STYPE v) {        \
    VTYPE ret;                                  \
@@ -386,6 +394,10 @@ BINARY_OP_CAST(__vec16_i8, int8_t,  __srem, %)
 BINARY_OP_CAST(__vec16_i8, uint8_t, __lshr, >>)
 BINARY_OP_CAST(__vec16_i8, int8_t,  __ashr, >>)

+SHIFT_UNIFORM(__vec16_i8, uint8_t, __lshr, >>)
+SHIFT_UNIFORM(__vec16_i8, int8_t, __ashr, >>)
+SHIFT_UNIFORM(__vec16_i8, int8_t, __shl, <<)
+
 CMP_OP(__vec16_i8, int8_t,  __equal, ==)
 CMP_OP(__vec16_i8, int8_t,  __not_equal, !=)
 CMP_OP(__vec16_i8, uint8_t, __unsigned_less_equal, <=)
@@ -425,6 +437,10 @@ BINARY_OP_CAST(__vec16_i16, int16_t,  __srem, %)
 BINARY_OP_CAST(__vec16_i16, uint16_t, __lshr, >>)
 BINARY_OP_CAST(__vec16_i16, int16_t,  __ashr, >>)

+SHIFT_UNIFORM(__vec16_i16, uint16_t, __lshr, >>)
+SHIFT_UNIFORM(__vec16_i16, int16_t, __ashr, >>)
+SHIFT_UNIFORM(__vec16_i16, int16_t, __shl, <<)
+
 CMP_OP(__vec16_i16, int16_t,  __equal, ==)
 CMP_OP(__vec16_i16, int16_t,  __not_equal, !=)
 CMP_OP(__vec16_i16, uint16_t, __unsigned_less_equal, <=)
@@ -464,6 +480,10 @@ BINARY_OP_CAST(__vec16_i32, int32_t,  __srem, %)
 BINARY_OP_CAST(__vec16_i32, uint32_t, __lshr, >>)
 BINARY_OP_CAST(__vec16_i32, int32_t,  __ashr, >>)

+SHIFT_UNIFORM(__vec16_i32, uint32_t, __lshr, >>)
+SHIFT_UNIFORM(__vec16_i32, int32_t, __ashr, >>)
+SHIFT_UNIFORM(__vec16_i32, int32_t, __shl, <<)
+
 CMP_OP(__vec16_i32, int32_t,  __equal, ==)
 CMP_OP(__vec16_i32, int32_t,  __not_equal, !=)
 CMP_OP(__vec16_i32, uint32_t, __unsigned_less_equal, <=)
@@ -503,6 +523,10 @@ BINARY_OP_CAST(__vec16_i64, int64_t,  __srem, %)
 BINARY_OP_CAST(__vec16_i64, uint64_t, __lshr, >>)
 BINARY_OP_CAST(__vec16_i64, int64_t,  __ashr, >>)

+SHIFT_UNIFORM(__vec16_i64, uint64_t, __lshr, >>)
+SHIFT_UNIFORM(__vec16_i64, int64_t, __ashr, >>)
+SHIFT_UNIFORM(__vec16_i64, int64_t, __shl, <<)
+
 CMP_OP(__vec16_i64, int64_t,  __equal, ==)
 CMP_OP(__vec16_i64, int64_t,  __not_equal, !=)
 CMP_OP(__vec16_i64, uint64_t, __unsigned_less_equal, <=)