Add peephole optimization to eliminate some mask AND operations.
On KNC, the various vector comparison instructions can optionally be masked; if a mask is provided, the result is effectively that the value returned is the AND of the mask with the result of the comparison. This change adds an optimization pass to the C++ backend that looks for vector ANDs where one operand is a comparison and rewrites them--e.g. "__and(__equal_float(a, b), c)" is changed to "__equal_float_and_mask(a, b, c)", saving an instruction in the end. Issue #319.
This commit is contained in:
@@ -390,6 +390,15 @@ static FORCEINLINE __vec64_i1 NAME##_##SUFFIX(TYPE a, TYPE b) { \
|
||||
for (int i = 0; i < 64; ++i) \
|
||||
ret.v |= uint64_t((CAST)(a.v[i]) OP (CAST)(b.v[i])) << i; \
|
||||
return ret; \
|
||||
} \
|
||||
static FORCEINLINE __vec64_i1 NAME##_##SUFFIX##_and_mask(TYPE a, TYPE b, \
|
||||
__vec64_i1 mask) { \
|
||||
__vec64_i1 ret; \
|
||||
ret.v = 0; \
|
||||
for (int i = 0; i < 64; ++i) \
|
||||
ret.v |= uint64_t((CAST)(a.v[i]) OP (CAST)(b.v[i])) << i; \
|
||||
ret.v &= mask.v; \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
#define INSERT_EXTRACT(VTYPE, STYPE) \
|
||||
|
||||
Reference in New Issue
Block a user