Extract constant offsets from gather/scatter base+offsets offset vectors.

When we're able to turn a general gather/scatter into the "base + offsets"
form, we now try to extract out any constant components of the offsets and
then pass them as a separate parameter to the gather/scatter function
implementation.

We then in turn carefully emit code for the addressing calculation so that
these constant offsets match LLVM's patterns to detect this case, such that
we get the constant offsets directly encoded in the instruction's addressing
calculation in many cases, saving arithmetic instructions to do these
calculations.

Improves performance of stencil by ~15%.  Other workloads unchanged.
This commit is contained in:
Matt Pharr
2012-01-24 14:41:15 -08:00
parent 7be2c399b1
commit a5b7fca7e0
5 changed files with 614 additions and 355 deletions

View File

@@ -1060,13 +1060,15 @@ static FORCEINLINE void __masked_store_blend_64(void *p, __vec16_i64 val,
// offsets * offsetScale is in bytes (for all of these)
#define GATHER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE offsets, uint32_t scale,\
__vec16_i1 mask) { \
static FORCEINLINE VTYPE FUNC(unsigned char *b, OTYPE varyingOffset, \
uint32_t scale, OTYPE constOffset, \
__vec16_i1 mask) { \
VTYPE ret; \
int8_t *base = (int8_t *)b; \
for (int i = 0; i < 16; ++i) \
if ((mask.v & (1 << i)) != 0) { \
STYPE *ptr = (STYPE *)(base + scale * offsets.v[i]); \
STYPE *ptr = (STYPE *)(base + scale * varyingOffset.v[i] + \
constOffset.v[i]); \
ret.v[i] = *ptr; \
} \
return ret; \
@@ -1104,13 +1106,15 @@ GATHER_GENERAL(__vec16_i64, int64_t, __vec16_i64, __gather64_i64)
// scatter
#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
static FORCEINLINE void FUNC(unsigned char *b, OTYPE offsets, uint32_t scale,\
#define SCATTER_BASE_VARYINGOFFSET(VTYPE, STYPE, OTYPE, FUNC) \
static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
uint32_t scale, OTYPE constOffset, \
VTYPE val, __vec16_i1 mask) { \
int8_t *base = (int8_t *)b; \
for (int i = 0; i < 16; ++i) \
if ((mask.v & (1 << i)) != 0) { \
STYPE *ptr = (STYPE *)(base + scale * offsets.v[i]); \
STYPE *ptr = (STYPE *)(base + scale * varyingOffset.v[i] + \
constOffset.v[i]); \
*ptr = val.v[i]; \
} \
}