Add generic-32 target.

This commit is contained in:
Matt Pharr
2012-05-03 11:11:06 -07:00
parent ee1fe3aa9f
commit d99bd279e8
8 changed files with 283 additions and 203 deletions

412
opt.cpp
View File

@@ -265,6 +265,124 @@ lGEPInst(llvm::Value *ptr, llvm::Value *offset, const char *name,
}
/** Given a vector of constant values (int, float, or bool) representing an
execution mask, convert it to a bitvector where the 0th bit corresponds
to the first vector value and so forth.
*/
static uint32_t
lConstElementsToMask(const llvm::SmallVector<llvm::Constant *,
ISPC_MAX_NVEC> &elements) {
Assert(elements.size() <= 32);
uint32_t mask = 0;
for (unsigned int i = 0; i < elements.size(); ++i) {
llvm::APInt intMaskValue;
// SSE has the "interesting" approach of encoding blending
// masks as <n x float>.
llvm::ConstantFP *cf = llvm::dyn_cast<llvm::ConstantFP>(elements[i]);
if (cf != NULL) {
llvm::APFloat apf = cf->getValueAPF();
intMaskValue = apf.bitcastToAPInt();
}
else {
// Otherwise get it as an int
llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(elements[i]);
Assert(ci != NULL); // vs return -1 if NULL?
intMaskValue = ci->getValue();
}
// Is the high-bit set? If so, OR in the appropriate bit in
// the result mask
if (intMaskValue.countLeadingOnes() > 0)
mask |= (1 << i);
}
return mask;
}
/** Given an llvm::Value represinting a vector mask, see if the value is a
constant. If so, return true and set *bits to be the integer mask
found by taking the high bits of the mask values in turn and
concatenating them into a single integer. In other words, given the
4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >, we have 0b1001 = 9.
*/
static bool
lGetMask(llvm::Value *factor, uint32_t *mask) {
#ifndef LLVM_3_0
llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(factor);
if (cdv != NULL) {
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
for (int i = 0; i < (int)cdv->getNumElements(); ++i)
elements.push_back(cdv->getElementAsConstant(i));
*mask = lConstElementsToMask(elements);
return true;
}
#endif
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(factor);
if (cv != NULL) {
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
#ifndef LLVM_3_0
for (int i = 0; i < (int)cv->getNumOperands(); ++i) {
llvm::Constant *c =
llvm::dyn_cast<llvm::Constant>(cv->getOperand(i));
if (c == NULL)
return NULL;
elements.push_back(c);
}
#else
cv->getVectorElements(elements);
#endif
*mask = lConstElementsToMask(elements);
return true;
}
else if (llvm::isa<llvm::ConstantAggregateZero>(factor)) {
*mask = 0;
return true;
}
else {
#if 0
llvm::ConstantExpr *ce = llvm::dyn_cast<llvm::ConstantExpr>(factor);
if (ce != NULL) {
llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
const llvm::TargetData *td = targetMachine->getTargetData();
llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td);
c->dump();
factor = c;
}
// else we should be able to handle it above...
Assert(!llvm::isa<llvm::Constant>(factor));
#endif
return false;
}
}
enum MaskStatus { ALL_ON, ALL_OFF, MIXED, UNKNOWN };
/** Determines if the given mask value is all on, all off, mixed, or
unknown at compile time.
*/
static MaskStatus
lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) {
uint32_t bits;
if (lGetMask(mask, &bits) == false)
return UNKNOWN;
if (bits == 0)
return ALL_OFF;
if (vecWidth == -1)
vecWidth = g->target.vectorWidth;
Assert(vecWidth <= 32);
for (int i = 0; i < vecWidth; ++i) {
if ((bits & (1ull << i)) == 0)
return MIXED;
}
return ALL_ON;
}
///////////////////////////////////////////////////////////////////////////
void
@@ -559,12 +677,12 @@ private:
instruction for this optimization pass.
*/
struct BlendInstruction {
BlendInstruction(llvm::Function *f, int ao, int o0, int o1, int of)
BlendInstruction(llvm::Function *f, uint32_t ao, int o0, int o1, int of)
: function(f), allOnMask(ao), op0(o0), op1(o1), opFactor(of) { }
/** Function pointer for the blend instruction */
llvm::Function *function;
/** Mask value for an "all on" mask for this instruction */
int allOnMask;
uint32_t allOnMask;
/** The operand number in the llvm CallInst corresponds to the
first operand to blend with. */
int op0;
@@ -609,99 +727,6 @@ IntrinsicsOpt::IntrinsicsOpt()
}
/** Given a vector of constant values (int, float, or bool) representing an
execution mask, convert it to a bitvector where the 0th bit corresponds
to the first vector value and so forth.
*/
static int
lConstElementsToMask(const llvm::SmallVector<llvm::Constant *,
ISPC_MAX_NVEC> &elements) {
Assert(elements.size() <= 32);
int mask = 0;
for (unsigned int i = 0; i < elements.size(); ++i) {
llvm::APInt intMaskValue;
// SSE has the "interesting" approach of encoding blending
// masks as <n x float>.
llvm::ConstantFP *cf = llvm::dyn_cast<llvm::ConstantFP>(elements[i]);
if (cf != NULL) {
llvm::APFloat apf = cf->getValueAPF();
intMaskValue = apf.bitcastToAPInt();
}
else {
// Otherwise get it as an int
llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(elements[i]);
Assert(ci != NULL); // vs return -1 if NULL?
intMaskValue = ci->getValue();
}
// Is the high-bit set? If so, OR in the appropriate bit in
// the result mask
if (intMaskValue.countLeadingOnes() > 0)
mask |= (1 << i);
}
return mask;
}
/** Given an llvm::Value represinting a vector mask, see if the value is a
constant. If so, return the integer mask found by taking the high bits
of the mask values in turn and concatenating them into a single integer.
In other words, given the 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >,
we have 0b1001 = 9.
*/
static int
lGetMask(llvm::Value *factor) {
/* FIXME: This will break if we ever do 32-wide compilation, in which case
it don't be possible to distinguish between -1 for "don't know" and
"known and all bits on". */
Assert(g->target.vectorWidth < 32);
#ifndef LLVM_3_0
llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(factor);
if (cdv != NULL) {
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
for (int i = 0; i < (int)cdv->getNumElements(); ++i)
elements.push_back(cdv->getElementAsConstant(i));
return lConstElementsToMask(elements);
}
#endif
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(factor);
if (cv != NULL) {
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
#ifndef LLVM_3_0
for (int i = 0; i < (int)cv->getNumOperands(); ++i) {
llvm::Constant *c =
llvm::dyn_cast<llvm::Constant>(cv->getOperand(i));
if (c == NULL)
return NULL;
elements.push_back(c);
}
#else
cv->getVectorElements(elements);
#endif
return lConstElementsToMask(elements);
}
else if (llvm::isa<llvm::ConstantAggregateZero>(factor))
return 0;
else {
#if 0
llvm::ConstantExpr *ce = llvm::dyn_cast<llvm::ConstantExpr>(factor);
if (ce != NULL) {
llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
const llvm::TargetData *td = targetMachine->getTargetData();
llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td);
c->dump();
factor = c;
}
// else we should be able to handle it above...
Assert(!llvm::isa<llvm::Constant>(factor));
#endif
return -1;
}
}
/** Given an llvm::Value, return true if we can determine that it's an
undefined value. This only makes a weak attempt at chasing this down,
only detecting flat-out undef values, and bitcasts of undef values.
@@ -779,26 +804,28 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
goto restart;
}
int mask = lGetMask(factor);
llvm::Value *value = NULL;
if (mask == 0)
// Mask all off -> replace with the first blend value
value = v[0];
else if (mask == blend->allOnMask)
// Mask all on -> replace with the second blend value
value = v[1];
uint32_t mask;
if (lGetMask(factor, &mask) == true) {
llvm::Value *value = NULL;
if (mask == 0)
// Mask all off -> replace with the first blend value
value = v[0];
else if (mask == blend->allOnMask)
// Mask all on -> replace with the second blend value
value = v[1];
if (value != NULL) {
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, value);
modifiedAny = true;
goto restart;
if (value != NULL) {
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, value);
modifiedAny = true;
goto restart;
}
}
}
else if (matchesMaskInstruction(callInst->getCalledFunction())) {
llvm::Value *factor = callInst->getArgOperand(0);
int mask = lGetMask(factor);
if (mask != -1) {
uint32_t mask;
if (lGetMask(factor, &mask) == true) {
// If the vector-valued mask has a known value, replace it
// with the corresponding integer mask from its elements
// high bits.
@@ -812,71 +839,75 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
else if (callInst->getCalledFunction() == avxMaskedLoad32 ||
callInst->getCalledFunction() == avxMaskedLoad64) {
llvm::Value *factor = callInst->getArgOperand(1);
int mask = lGetMask(factor);
if (mask == 0) {
// nothing being loaded, replace with undef value
llvm::Type *returnType = callInst->getType();
Assert(llvm::isa<llvm::VectorType>(returnType));
llvm::Value *undefValue = llvm::UndefValue::get(returnType);
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, undefValue);
modifiedAny = true;
goto restart;
}
else if (mask == 0xff) {
// all lanes active; replace with a regular load
llvm::Type *returnType = callInst->getType();
Assert(llvm::isa<llvm::VectorType>(returnType));
// cast the i8 * to the appropriate type
const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast");
llvm::Value *castPtr =
new llvm::BitCastInst(callInst->getArgOperand(0),
llvm::PointerType::get(returnType, 0),
name, callInst);
lCopyMetadata(castPtr, callInst);
int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
name = LLVMGetName(callInst->getArgOperand(0), "_load");
llvm::Instruction *loadInst =
new llvm::LoadInst(castPtr, name, false /* not volatile */,
align, (llvm::Instruction *)NULL);
lCopyMetadata(loadInst, callInst);
llvm::ReplaceInstWithInst(callInst, loadInst);
modifiedAny = true;
goto restart;
uint32_t mask;
if (lGetMask(factor, &mask) == true) {
if (mask == 0) {
// nothing being loaded, replace with undef value
llvm::Type *returnType = callInst->getType();
Assert(llvm::isa<llvm::VectorType>(returnType));
llvm::Value *undefValue = llvm::UndefValue::get(returnType);
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, undefValue);
modifiedAny = true;
goto restart;
}
else if (mask == 0xff) {
// all lanes active; replace with a regular load
llvm::Type *returnType = callInst->getType();
Assert(llvm::isa<llvm::VectorType>(returnType));
// cast the i8 * to the appropriate type
const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast");
llvm::Value *castPtr =
new llvm::BitCastInst(callInst->getArgOperand(0),
llvm::PointerType::get(returnType, 0),
name, callInst);
lCopyMetadata(castPtr, callInst);
int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
name = LLVMGetName(callInst->getArgOperand(0), "_load");
llvm::Instruction *loadInst =
new llvm::LoadInst(castPtr, name, false /* not volatile */,
align, (llvm::Instruction *)NULL);
lCopyMetadata(loadInst, callInst);
llvm::ReplaceInstWithInst(callInst, loadInst);
modifiedAny = true;
goto restart;
}
}
}
else if (callInst->getCalledFunction() == avxMaskedStore32 ||
callInst->getCalledFunction() == avxMaskedStore64) {
// NOTE: mask is the 2nd parameter, not the 3rd one!!
llvm::Value *factor = callInst->getArgOperand(1);
int mask = lGetMask(factor);
if (mask == 0) {
// nothing actually being stored, just remove the inst
callInst->eraseFromParent();
modifiedAny = true;
goto restart;
}
else if (mask == 0xff) {
// all lanes storing, so replace with a regular store
llvm::Value *rvalue = callInst->getArgOperand(2);
llvm::Type *storeType = rvalue->getType();
const char *name = LLVMGetName(callInst->getArgOperand(0),
"_ptrcast");
llvm::Value *castPtr =
new llvm::BitCastInst(callInst->getArgOperand(0),
llvm::PointerType::get(storeType, 0),
name, callInst);
lCopyMetadata(castPtr, callInst);
uint32_t mask;
if (lGetMask(factor, &mask) == true) {
if (mask == 0) {
// nothing actually being stored, just remove the inst
callInst->eraseFromParent();
modifiedAny = true;
goto restart;
}
else if (mask == 0xff) {
// all lanes storing, so replace with a regular store
llvm::Value *rvalue = callInst->getArgOperand(2);
llvm::Type *storeType = rvalue->getType();
const char *name = LLVMGetName(callInst->getArgOperand(0),
"_ptrcast");
llvm::Value *castPtr =
new llvm::BitCastInst(callInst->getArgOperand(0),
llvm::PointerType::get(storeType, 0),
name, callInst);
lCopyMetadata(castPtr, callInst);
llvm::StoreInst *storeInst =
new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
storeInst->setAlignment(align);
lCopyMetadata(storeInst, callInst);
llvm::ReplaceInstWithInst(callInst, storeInst);
llvm::StoreInst *storeInst =
new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
storeInst->setAlignment(align);
lCopyMetadata(storeInst, callInst);
llvm::ReplaceInstWithInst(callInst, storeInst);
modifiedAny = true;
goto restart;
modifiedAny = true;
goto restart;
}
}
}
}
@@ -949,13 +980,13 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::SelectInst *selectInst = llvm::dyn_cast<llvm::SelectInst>(&*iter);
if (selectInst != NULL && selectInst->getType()->isVectorTy()) {
llvm::Value *factor = selectInst->getOperand(0);
int mask = lGetMask(factor);
int allOnMask = (1 << g->target.vectorWidth) - 1;
MaskStatus maskStatus = lGetMaskStatus(factor);
llvm::Value *value = NULL;
if (mask == allOnMask)
if (maskStatus == ALL_ON)
// Mask all on -> replace with the first select value
value = selectInst->getOperand(1);
else if (mask == 0)
else if (maskStatus == ALL_OFF)
// Mask all off -> replace with the second select value
value = selectInst->getOperand(2);
@@ -976,8 +1007,8 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
if (calledFunc == NULL || calledFunc != m->module->getFunction("__movmsk"))
continue;
int mask = lGetMask(callInst->getArgOperand(0));
if (mask != -1) {
uint32_t mask;
if (lGetMask(callInst->getArgOperand(0), &mask) == true) {
#if 0
fprintf(stderr, "mask %d\n", mask);
callInst->getArgOperand(0)->dump();
@@ -1964,10 +1995,8 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::Value *rvalue = callInst->getArgOperand(1);
llvm::Value *mask = callInst->getArgOperand(2);
int allOnMask = (1 << g->target.vectorWidth) - 1;
int maskAsInt = lGetMask(mask);
if (maskAsInt == 0) {
MaskStatus maskStatus = lGetMaskStatus(mask);
if (maskStatus == ALL_OFF) {
// Zero mask - no-op, so remove the store completely. (This
// may in turn lead to being able to optimize out instructions
// that compute the rvalue...)
@@ -1975,11 +2004,10 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
modifiedAny = true;
goto restart;
}
else if (maskAsInt == allOnMask) {
else if (maskStatus == ALL_ON) {
// The mask is all on, so turn this into a regular store
llvm::Type *rvalueType = rvalue->getType();
llvm::Type *ptrType =
llvm::PointerType::get(rvalueType, 0);
llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0);
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
lCopyMetadata(lvalue, callInst);
@@ -2072,20 +2100,18 @@ MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
// Got one; grab the operands
llvm::Value *ptr = callInst->getArgOperand(0);
llvm::Value *mask = callInst->getArgOperand(1);
int allOnMask = (1 << g->target.vectorWidth) - 1;
int maskAsInt = lGetMask(mask);
if (maskAsInt == 0) {
MaskStatus maskStatus = lGetMaskStatus(mask);
if (maskStatus == ALL_OFF) {
// Zero mask - no-op, so replace the load with an undef value
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, llvm::UndefValue::get(callInst->getType()));
modifiedAny = true;
goto restart;
}
else if (maskAsInt == allOnMask) {
else if (maskStatus == ALL_ON) {
// The mask is all on, so turn this into a regular load
llvm::Type *ptrType =
llvm::PointerType::get(callInst->getType(), 0);
llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0);
ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load",
callInst);
llvm::Instruction *load =
@@ -2558,18 +2584,6 @@ public:
char GatherCoalescePass::ID = 0;
/* Returns true if the mask is known at compile time to be "all on". */
static bool
lIsMaskAllOn(llvm::Value *mask) {
int m = lGetMask(mask);
if (m == -1)
return false;
int allOnMask = (1 << g->target.vectorWidth) - 1;
return (m == allOnMask);
}
/** Representation of a memory load that the gather coalescing code has
decided to generate.
*/
@@ -3497,7 +3511,7 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) {
// Then and only then do we have a common base pointer with all
// offsets from that constants (in which case we can potentially
// coalesce).
if (lIsMaskAllOn(mask) == false)
if (lGetMaskStatus(mask) != ALL_ON)
continue;
if (!LLVMVectorValuesAllEqual(variableOffsets))