For << and >> with C++, detect when all instances are shifting by the same amount.

In this case, we now emit calls to potentially-specialized functions for the
left/right shifts that take a single integer value for the shift amount.  These
in turn can be matched to the corresponding intrinsics for the SSE target.

Issue #145.
This commit is contained in:
Matt Pharr
2012-01-19 10:04:32 -07:00
parent 3f89295d10
commit 68f6ea8def
6 changed files with 433 additions and 280 deletions

View File

@@ -251,6 +251,14 @@ static FORCEINLINE TYPE __select(bool cond, TYPE a, TYPE b) { \
return cond ? a : b; \
}
#define SHIFT_UNIFORM(TYPE, CAST, NAME, OP) \
static FORCEINLINE TYPE NAME(TYPE a, int32_t b) { \
TYPE ret; \
for (int i = 0; i < 16; ++i) \
ret.v[i] = (CAST)(a.v[i]) OP b; \
return ret; \
}
#define SMEAR(VTYPE, NAME, STYPE) \
static FORCEINLINE VTYPE __smear_##NAME(STYPE v) { \
VTYPE ret; \
@@ -386,6 +394,10 @@ BINARY_OP_CAST(__vec16_i8, int8_t, __srem, %)
BINARY_OP_CAST(__vec16_i8, uint8_t, __lshr, >>)
BINARY_OP_CAST(__vec16_i8, int8_t, __ashr, >>)
SHIFT_UNIFORM(__vec16_i8, uint8_t, __lshr, >>)
SHIFT_UNIFORM(__vec16_i8, int8_t, __ashr, >>)
SHIFT_UNIFORM(__vec16_i8, int8_t, __shl, <<)
CMP_OP(__vec16_i8, int8_t, __equal, ==)
CMP_OP(__vec16_i8, int8_t, __not_equal, !=)
CMP_OP(__vec16_i8, uint8_t, __unsigned_less_equal, <=)
@@ -425,6 +437,10 @@ BINARY_OP_CAST(__vec16_i16, int16_t, __srem, %)
BINARY_OP_CAST(__vec16_i16, uint16_t, __lshr, >>)
BINARY_OP_CAST(__vec16_i16, int16_t, __ashr, >>)
SHIFT_UNIFORM(__vec16_i16, uint16_t, __lshr, >>)
SHIFT_UNIFORM(__vec16_i16, int16_t, __ashr, >>)
SHIFT_UNIFORM(__vec16_i16, int16_t, __shl, <<)
CMP_OP(__vec16_i16, int16_t, __equal, ==)
CMP_OP(__vec16_i16, int16_t, __not_equal, !=)
CMP_OP(__vec16_i16, uint16_t, __unsigned_less_equal, <=)
@@ -464,6 +480,10 @@ BINARY_OP_CAST(__vec16_i32, int32_t, __srem, %)
BINARY_OP_CAST(__vec16_i32, uint32_t, __lshr, >>)
BINARY_OP_CAST(__vec16_i32, int32_t, __ashr, >>)
SHIFT_UNIFORM(__vec16_i32, uint32_t, __lshr, >>)
SHIFT_UNIFORM(__vec16_i32, int32_t, __ashr, >>)
SHIFT_UNIFORM(__vec16_i32, int32_t, __shl, <<)
CMP_OP(__vec16_i32, int32_t, __equal, ==)
CMP_OP(__vec16_i32, int32_t, __not_equal, !=)
CMP_OP(__vec16_i32, uint32_t, __unsigned_less_equal, <=)
@@ -503,6 +523,10 @@ BINARY_OP_CAST(__vec16_i64, int64_t, __srem, %)
BINARY_OP_CAST(__vec16_i64, uint64_t, __lshr, >>)
BINARY_OP_CAST(__vec16_i64, int64_t, __ashr, >>)
SHIFT_UNIFORM(__vec16_i64, uint64_t, __lshr, >>)
SHIFT_UNIFORM(__vec16_i64, int64_t, __ashr, >>)
SHIFT_UNIFORM(__vec16_i64, int64_t, __shl, <<)
CMP_OP(__vec16_i64, int64_t, __equal, ==)
CMP_OP(__vec16_i64, int64_t, __not_equal, !=)
CMP_OP(__vec16_i64, uint64_t, __unsigned_less_equal, <=)