Do all memory op improvements in a single optimization pass.

Rather than having separate passes to do conversion, when possible, of:

- General gather/scatter of a vector of pointers to g/s of
  a base pointer and integer offsets
- Gather/scatter to masked load/store, load+broadcast
- Masked load/store to regular load/store

Now all are done in a single ImproveMemoryOps pass.  This change was in
particular to address some phase ordering issues that showed up with
multidimensional array access wherein after determining that an outer
dimension had the same index value, we previously weren't able to take
advantage of the uniformity of the resulting pointer.
This commit is contained in:
Matt Pharr
2012-06-12 13:56:17 -07:00
parent 40a295e951
commit 96450e17a3

589
opt.cpp
View File

@@ -89,12 +89,9 @@
static llvm::Pass *CreateIntrinsicsOptPass();
static llvm::Pass *CreateVSelMovmskOptPass();
static llvm::Pass *CreateDetectGSBaseOffsetsPass();
static llvm::Pass *CreateGSToLoadStorePass();
static llvm::Pass *CreateGatherCoalescePass();
static llvm::Pass *CreateMaskedStoreOptPass();
static llvm::Pass *CreateMaskedLoadOptPass();
static llvm::Pass *CreateImproveMemoryOpsPass();
static llvm::Pass *CreateGatherCoalescePass();
static llvm::Pass *CreateReplacePseudoMemoryOpsPass();
static llvm::Pass *CreateIsCompileTimeConstantPass(bool isLastTry);
@@ -414,7 +411,7 @@ Optimize(llvm::Module *module, int optLevel) {
// run absolutely no optimizations, since the front-end needs us to
// take the various __pseudo_* functions it has emitted and turn
// them into something that can actually execute.
optPM.add(CreateDetectGSBaseOffsetsPass());
optPM.add(CreateImproveMemoryOpsPass());
if (g->opt.disableHandlePseudoMemoryOps == false)
optPM.add(CreateReplacePseudoMemoryOpsPass());
@@ -446,12 +443,13 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createDeadInstEliminationPass());
optPM.add(llvm::createCFGSimplificationPass());
optPM.add(CreateDetectGSBaseOffsetsPass());
if (g->opt.disableGatherScatterOptimizations == false &&
g->target.vectorWidth > 1) {
optPM.add(CreateImproveMemoryOpsPass());
}
if (!g->opt.disableMaskAllOnOptimizations) {
optPM.add(CreateIntrinsicsOptPass());
optPM.add(CreateVSelMovmskOptPass());
optPM.add(CreateMaskedStoreOptPass());
optPM.add(CreateMaskedLoadOptPass());
}
optPM.add(llvm::createDeadInstEliminationPass());
@@ -485,13 +483,11 @@ Optimize(llvm::Module *module, int optLevel) {
if (!g->opt.disableMaskAllOnOptimizations) {
optPM.add(CreateIntrinsicsOptPass());
optPM.add(CreateVSelMovmskOptPass());
optPM.add(CreateMaskedStoreOptPass());
optPM.add(CreateMaskedLoadOptPass());
}
if (g->opt.disableGatherScatterOptimizations == false &&
g->target.vectorWidth > 1) {
optPM.add(CreateGSToLoadStorePass());
optPM.add(CreateImproveMemoryOpsPass());
if (g->opt.disableCoalescing == false &&
g->target.isa != Target::GENERIC) {
@@ -502,23 +498,26 @@ Optimize(llvm::Module *module, int optLevel) {
}
}
if (g->opt.disableHandlePseudoMemoryOps == false)
optPM.add(CreateReplacePseudoMemoryOpsPass());
if (!g->opt.disableMaskAllOnOptimizations) {
optPM.add(CreateMaskedStoreOptPass());
optPM.add(CreateMaskedLoadOptPass());
}
optPM.add(llvm::createFunctionInliningPass());
optPM.add(llvm::createConstantPropagationPass());
optPM.add(CreateIntrinsicsOptPass());
optPM.add(CreateVSelMovmskOptPass());
if (g->opt.disableGatherScatterOptimizations == false &&
g->target.vectorWidth > 1) {
optPM.add(CreateImproveMemoryOpsPass());
}
optPM.add(llvm::createIPSCCPPass());
optPM.add(llvm::createDeadArgEliminationPass());
optPM.add(llvm::createInstructionCombiningPass());
optPM.add(llvm::createCFGSimplificationPass());
if (g->opt.disableHandlePseudoMemoryOps == false)
optPM.add(CreateReplacePseudoMemoryOpsPass());
optPM.add(CreateIntrinsicsOptPass());
optPM.add(CreateVSelMovmskOptPass());
optPM.add(llvm::createFunctionInliningPass());
optPM.add(llvm::createArgumentPromotionPass());
optPM.add(llvm::createScalarReplAggregatesPass(-1, false));
@@ -959,7 +958,7 @@ CreateVSelMovmskOptPass() {
///////////////////////////////////////////////////////////////////////////
// DetectGSBaseOffsetsPass
// ImproveMemoryOpsPass
/** When the front-end emits gathers and scatters, it generates an array of
vector-width pointers to represent the set of addresses to read from or
@@ -971,16 +970,16 @@ CreateVSelMovmskOptPass() {
See for example the comments discussing the __pseudo_gather functions
in builtins.cpp for more information about this.
*/
class DetectGSBaseOffsetsPass : public llvm::BasicBlockPass {
class ImproveMemoryOpsPass : public llvm::BasicBlockPass {
public:
static char ID;
DetectGSBaseOffsetsPass() : BasicBlockPass(ID) { }
ImproveMemoryOpsPass() : BasicBlockPass(ID) { }
const char *getPassName() const { return "Gather/Scatter Flattening"; }
const char *getPassName() const { return "Improve Memory Ops"; }
bool runOnBasicBlock(llvm::BasicBlock &BB);
};
char DetectGSBaseOffsetsPass::ID = 0;
char ImproveMemoryOpsPass::ID = 0;
@@ -1670,6 +1669,8 @@ lOffsets32BitSafe(llvm::Value **variableOffsetPtr,
}
static bool
lGSToGSBaseOffsets(llvm::CallInst *callInst) {
struct GSInfo {
GSInfo(const char *pgFuncName, const char *pgboFuncName,
const char *pgbo32FuncName, bool ig)
@@ -1683,11 +1684,6 @@ struct GSInfo {
const bool isGather;
};
bool
DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("DetectGSBaseOffsets");
GSInfo gsFuncs[] = {
GSInfo("__pseudo_gather32_i8", "__pseudo_gather_base_offsets32_i8",
"__pseudo_gather_base_offsets32_i8", true),
@@ -1741,21 +1737,12 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
GSInfo("__pseudo_scatter64_double", "__pseudo_scatter_base_offsets64_double",
"__pseudo_scatter_base_offsets32_double", false),
};
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
for (int i = 0; i < numGSFuncs; ++i)
Assert(gsFuncs[i].func != NULL && gsFuncs[i].baseOffsetsFunc != NULL &&
gsFuncs[i].baseOffsets32Func != NULL);
bool modifiedAny = false;
restart:
// Iterate through all of the instructions in the basic block.
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
// If we don't have a call to one of the
// __pseudo_{gather,scatter}_* functions, then just go on to the
// next instruction.
if (callInst == NULL)
continue;
GSInfo *info = NULL;
for (int i = 0; i < numGSFuncs; ++i)
if (gsFuncs[i].func != NULL &&
@@ -1764,7 +1751,7 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
break;
}
if (info == NULL)
continue;
return false;
// Try to transform the array of pointers to a single base pointer
// and an array of int32 offsets. (All the hard work is done by
@@ -1778,7 +1765,7 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
// It's actually a fully general gather/scatter with a varying
// set of base pointers, so leave it as is and continune onward
// to the next instruction...
continue;
return false;
// Try to decompose the offset vector into a compile time constant
// component and a varying component. The constant component is
@@ -1843,254 +1830,19 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
modifiedAny = true;
goto restart;
}
DEBUG_END_PASS("DetectGSBaseOffsets");
return modifiedAny;
return true;
}
static llvm::Pass *
CreateDetectGSBaseOffsetsPass() {
return new DetectGSBaseOffsetsPass;
static llvm::Value *
lComputeCommonPointer(llvm::Value *base, llvm::Value *offsets,
llvm::Instruction *insertBefore) {
llvm::Value *firstOffset = LLVMExtractFirstVectorElement(offsets);
return lGEPInst(base, firstOffset, "ptr", insertBefore);
}
///////////////////////////////////////////////////////////////////////////
// MaskedStoreOptPass
/** Masked stores are generally more complex than regular stores; for
example, they require multiple instructions to simulate under SSE.
This optimization detects cases where masked stores can be replaced
with regular stores or removed entirely, for the cases of an 'all on'
mask and an 'all off' mask, respectively.
*/
class MaskedStoreOptPass : public llvm::BasicBlockPass {
public:
static char ID;
MaskedStoreOptPass() : BasicBlockPass(ID) { }
const char *getPassName() const { return "Masked Store Scalarize"; }
bool runOnBasicBlock(llvm::BasicBlock &BB);
};
char MaskedStoreOptPass::ID = 0;
struct MSInfo {
MSInfo(const char *name, const int a)
: align(a) {
func = m->module->getFunction(name);
Assert(func != NULL);
}
llvm::Function *func;
const int align;
};
bool
MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("MaskedStoreOpt");
MSInfo msInfo[] = {
MSInfo("__pseudo_masked_store_i8", 1),
MSInfo("__pseudo_masked_store_i16", 2),
MSInfo("__pseudo_masked_store_i32", 4),
MSInfo("__pseudo_masked_store_float", 4),
MSInfo("__pseudo_masked_store_i64", 8),
MSInfo("__pseudo_masked_store_double", 8),
MSInfo("__masked_store_blend_i8", 1),
MSInfo("__masked_store_blend_i16", 2),
MSInfo("__masked_store_blend_i32", 4),
MSInfo("__masked_store_blend_float", 4),
MSInfo("__masked_store_blend_i64", 8),
MSInfo("__masked_store_blend_double", 8),
MSInfo("__masked_store_i8", 1),
MSInfo("__masked_store_i16", 2),
MSInfo("__masked_store_i32", 4),
MSInfo("__masked_store_float", 4),
MSInfo("__masked_store_i64", 8),
MSInfo("__masked_store_double", 8)
};
bool modifiedAny = false;
restart:
// Iterate over all of the instructions to look for one of the various
// masked store functions
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
if (callInst == NULL)
continue;
llvm::Function *called = callInst->getCalledFunction();
if (called == NULL)
continue;
int nMSFuncs = sizeof(msInfo) / sizeof(msInfo[0]);
MSInfo *info = NULL;
for (int i = 0; i < nMSFuncs; ++i) {
if (msInfo[i].func != NULL && called == msInfo[i].func) {
info = &msInfo[i];
break;
}
}
if (info == NULL)
continue;
// Got one; grab the operands
llvm::Value *lvalue = callInst->getArgOperand(0);
llvm::Value *rvalue = callInst->getArgOperand(1);
llvm::Value *mask = callInst->getArgOperand(2);
MaskStatus maskStatus = lGetMaskStatus(mask);
if (maskStatus == ALL_OFF) {
// Zero mask - no-op, so remove the store completely. (This
// may in turn lead to being able to optimize out instructions
// that compute the rvalue...)
callInst->eraseFromParent();
modifiedAny = true;
goto restart;
}
else if (maskStatus == ALL_ON) {
// The mask is all on, so turn this into a regular store
llvm::Type *rvalueType = rvalue->getType();
llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0);
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
lCopyMetadata(lvalue, callInst);
llvm::Instruction *store =
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
info->align);
lCopyMetadata(store, callInst);
llvm::ReplaceInstWithInst(callInst, store);
modifiedAny = true;
goto restart;
}
}
DEBUG_END_PASS("MaskedStoreOpt");
return modifiedAny;
}
static llvm::Pass *
CreateMaskedStoreOptPass() {
return new MaskedStoreOptPass;
}
///////////////////////////////////////////////////////////////////////////
// MaskedLoadOptPass
/** Masked load improvements for the all on/all off mask cases.
*/
class MaskedLoadOptPass : public llvm::BasicBlockPass {
public:
static char ID;
MaskedLoadOptPass() : BasicBlockPass(ID) { }
const char *getPassName() const { return "Masked Load Improvements"; }
bool runOnBasicBlock(llvm::BasicBlock &BB);
};
char MaskedLoadOptPass::ID = 0;
struct MLInfo {
MLInfo(const char *name, const int a)
: align(a) {
func = m->module->getFunction(name);
Assert(func != NULL);
}
llvm::Function *func;
const int align;
};
bool
MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("MaskedLoadOpt");
MLInfo mlInfo[] = {
MLInfo("__masked_load_i8", 1),
MLInfo("__masked_load_i16", 2),
MLInfo("__masked_load_i32", 4),
MLInfo("__masked_load_float", 4),
MLInfo("__masked_load_i64", 8),
MLInfo("__masked_load_double", 8)
};
bool modifiedAny = false;
restart:
// Iterate over all of the instructions to look for one of the various
// masked load functions
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
if (!callInst)
continue;
llvm::Function *called = callInst->getCalledFunction();
if (called == NULL)
continue;
int nFuncs = sizeof(mlInfo) / sizeof(mlInfo[0]);
MLInfo *info = NULL;
for (int i = 0; i < nFuncs; ++i) {
if (mlInfo[i].func != NULL && called == mlInfo[i].func) {
info = &mlInfo[i];
break;
}
}
if (info == NULL)
continue;
// Got one; grab the operands
llvm::Value *ptr = callInst->getArgOperand(0);
llvm::Value *mask = callInst->getArgOperand(1);
MaskStatus maskStatus = lGetMaskStatus(mask);
if (maskStatus == ALL_OFF) {
// Zero mask - no-op, so replace the load with an undef value
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, llvm::UndefValue::get(callInst->getType()));
modifiedAny = true;
goto restart;
}
else if (maskStatus == ALL_ON) {
// The mask is all on, so turn this into a regular load
llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0);
ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load",
callInst);
llvm::Instruction *load =
new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */,
info->align, (llvm::Instruction *)NULL);
lCopyMetadata(load, callInst);
llvm::ReplaceInstWithInst(callInst, load);
modifiedAny = true;
goto restart;
}
}
DEBUG_END_PASS("MaskedLoadOpt");
return modifiedAny;
}
static llvm::Pass *
CreateMaskedLoadOptPass() {
return new MaskedLoadOptPass;
}
///////////////////////////////////////////////////////////////////////////
// GSToLoadStorePass
/** After earlier optimization passes have run, we are sometimes able to
determine that gathers/scatters are actually accessing memory in a more
regular fashion and then change the operation to something simpler and
@@ -2106,19 +1858,8 @@ CreateMaskedLoadOptPass() {
shuffle or things that could be handled with hybrids of e.g. 2 4-wide
vector loads with AVX, etc.
*/
class GSToLoadStorePass : public llvm::BasicBlockPass {
public:
static char ID;
GSToLoadStorePass() : BasicBlockPass(ID) { }
const char *getPassName() const { return "Gather/Scatter Improvements"; }
bool runOnBasicBlock(llvm::BasicBlock &BB);
};
char GSToLoadStorePass::ID = 0;
static bool
lGSToLoadStore(llvm::CallInst *callInst) {
struct GatherImpInfo {
GatherImpInfo(const char *pName, const char *lmName, llvm::Type *st,
int a)
@@ -2135,35 +1876,6 @@ struct GatherImpInfo {
const int align;
};
static llvm::Value *
lComputeCommonPointer(llvm::Value *base, llvm::Value *offsets,
llvm::Instruction *insertBefore) {
llvm::Value *firstOffset = LLVMExtractFirstVectorElement(offsets);
return lGEPInst(base, firstOffset, "ptr", insertBefore);
}
struct ScatterImpInfo {
ScatterImpInfo(const char *pName, const char *msName,
llvm::Type *vpt, int a)
: align(a) {
pseudoFunc = m->module->getFunction(pName);
maskedStoreFunc = m->module->getFunction(msName);
vecPtrType = vpt;
Assert(pseudoFunc != NULL && maskedStoreFunc != NULL);
}
llvm::Function *pseudoFunc;
llvm::Function *maskedStoreFunc;
llvm::Type *vecPtrType;
const int align;
};
bool
GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("GSToLoadStorePass");
GatherImpInfo gInfo[] = {
GatherImpInfo("__pseudo_gather_base_offsets32_i8", "__masked_load_i8",
LLVMTypes::Int8Type, 1),
@@ -2190,6 +1902,22 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
GatherImpInfo("__pseudo_gather_base_offsets64_double", "__masked_load_double",
LLVMTypes::DoubleType, 8)
};
struct ScatterImpInfo {
ScatterImpInfo(const char *pName, const char *msName,
llvm::Type *vpt, int a)
: align(a) {
pseudoFunc = m->module->getFunction(pName);
maskedStoreFunc = m->module->getFunction(msName);
vecPtrType = vpt;
Assert(pseudoFunc != NULL && maskedStoreFunc != NULL);
}
llvm::Function *pseudoFunc;
llvm::Function *maskedStoreFunc;
llvm::Type *vecPtrType;
const int align;
};
ScatterImpInfo sInfo[] = {
ScatterImpInfo("__pseudo_scatter_base_offsets32_i8", "__pseudo_masked_store_i8",
LLVMTypes::Int8VectorPointerType, 1),
@@ -2217,19 +1945,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
LLVMTypes::DoubleVectorPointerType, 8)
};
bool modifiedAny = false;
restart:
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
// Iterate over all of the instructions and look for calls to
// __pseudo_*_base_offsets_* calls.
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
if (callInst == NULL)
continue;
llvm::Function *calledFunc = callInst->getCalledFunction();
if (calledFunc == NULL)
continue;
GatherImpInfo *gatherInfo = NULL;
ScatterImpInfo *scatterInfo = NULL;
@@ -2248,7 +1964,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
}
}
if (gatherInfo == NULL && scatterInfo == NULL)
continue;
return false;
SourcePos pos;
lGetSourcePosFromMetadata(callInst, &pos);
@@ -2313,8 +2029,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
lCopyMetadata(vecValue, callInst);
llvm::ReplaceInstWithInst(callInst,
llvm::dyn_cast<llvm::Instruction>(vecValue));
modifiedAny = true;
goto restart;
return true;
}
else {
// A scatter with everyone going to the same location is
@@ -2331,6 +2046,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
// case. We'll just let a bunch of the program instances
// do redundant writes, since this isn't important to make
// fast anyway...
return false;
}
}
else {
@@ -2351,6 +2067,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
LLVMGetName(ptr, "_masked_load"));
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
return true;
}
else {
Debug(pos, "Transformed scatter to unaligned vector store!");
@@ -2361,23 +2078,207 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
mask, "");
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
return true;
}
}
return false;
}
}
///////////////////////////////////////////////////////////////////////////
// MaskedStoreOptPass
/** Masked stores are generally more complex than regular stores; for
example, they require multiple instructions to simulate under SSE.
This optimization detects cases where masked stores can be replaced
with regular stores or removed entirely, for the cases of an 'all on'
mask and an 'all off' mask, respectively.
*/
static bool
lImproveMaskedStore(llvm::CallInst *callInst) {
struct MSInfo {
MSInfo(const char *name, const int a)
: align(a) {
func = m->module->getFunction(name);
Assert(func != NULL);
}
llvm::Function *func;
const int align;
};
MSInfo msInfo[] = {
MSInfo("__pseudo_masked_store_i8", 1),
MSInfo("__pseudo_masked_store_i16", 2),
MSInfo("__pseudo_masked_store_i32", 4),
MSInfo("__pseudo_masked_store_float", 4),
MSInfo("__pseudo_masked_store_i64", 8),
MSInfo("__pseudo_masked_store_double", 8),
MSInfo("__masked_store_blend_i8", 1),
MSInfo("__masked_store_blend_i16", 2),
MSInfo("__masked_store_blend_i32", 4),
MSInfo("__masked_store_blend_float", 4),
MSInfo("__masked_store_blend_i64", 8),
MSInfo("__masked_store_blend_double", 8),
MSInfo("__masked_store_i8", 1),
MSInfo("__masked_store_i16", 2),
MSInfo("__masked_store_i32", 4),
MSInfo("__masked_store_float", 4),
MSInfo("__masked_store_i64", 8),
MSInfo("__masked_store_double", 8)
};
llvm::Function *called = callInst->getCalledFunction();
int nMSFuncs = sizeof(msInfo) / sizeof(msInfo[0]);
MSInfo *info = NULL;
for (int i = 0; i < nMSFuncs; ++i) {
if (msInfo[i].func != NULL && called == msInfo[i].func) {
info = &msInfo[i];
break;
}
}
if (info == NULL)
return false;
// Got one; grab the operands
llvm::Value *lvalue = callInst->getArgOperand(0);
llvm::Value *rvalue = callInst->getArgOperand(1);
llvm::Value *mask = callInst->getArgOperand(2);
MaskStatus maskStatus = lGetMaskStatus(mask);
if (maskStatus == ALL_OFF) {
// Zero mask - no-op, so remove the store completely. (This
// may in turn lead to being able to optimize out instructions
// that compute the rvalue...)
callInst->eraseFromParent();
return true;
}
else if (maskStatus == ALL_ON) {
// The mask is all on, so turn this into a regular store
llvm::Type *rvalueType = rvalue->getType();
llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0);
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
lCopyMetadata(lvalue, callInst);
llvm::Instruction *store =
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
info->align);
lCopyMetadata(store, callInst);
llvm::ReplaceInstWithInst(callInst, store);
return true;
}
return false;
}
static bool
lImproveMaskedLoad(llvm::CallInst *callInst,
llvm::BasicBlock::iterator iter) {
struct MLInfo {
MLInfo(const char *name, const int a)
: align(a) {
func = m->module->getFunction(name);
Assert(func != NULL);
}
llvm::Function *func;
const int align;
};
MLInfo mlInfo[] = {
MLInfo("__masked_load_i8", 1),
MLInfo("__masked_load_i16", 2),
MLInfo("__masked_load_i32", 4),
MLInfo("__masked_load_float", 4),
MLInfo("__masked_load_i64", 8),
MLInfo("__masked_load_double", 8)
};
llvm::Function *called = callInst->getCalledFunction();
int nFuncs = sizeof(mlInfo) / sizeof(mlInfo[0]);
MLInfo *info = NULL;
for (int i = 0; i < nFuncs; ++i) {
if (mlInfo[i].func != NULL && called == mlInfo[i].func) {
info = &mlInfo[i];
break;
}
}
if (info == NULL)
return false;
// Got one; grab the operands
llvm::Value *ptr = callInst->getArgOperand(0);
llvm::Value *mask = callInst->getArgOperand(1);
MaskStatus maskStatus = lGetMaskStatus(mask);
if (maskStatus == ALL_OFF) {
// Zero mask - no-op, so replace the load with an undef value
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, llvm::UndefValue::get(callInst->getType()));
return true;
}
else if (maskStatus == ALL_ON) {
// The mask is all on, so turn this into a regular load
llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0);
ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load",
callInst);
llvm::Instruction *load =
new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */,
info->align, (llvm::Instruction *)NULL);
lCopyMetadata(load, callInst);
llvm::ReplaceInstWithInst(callInst, load);
return true;
}
else
return false;
}
bool
ImproveMemoryOpsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("ImproveMemoryOps");
bool modifiedAny = false;
restart:
// Iterate through all of the instructions in the basic block.
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
// If we don't have a call to one of the
// __pseudo_{gather,scatter}_* functions, then just go on to the
// next instruction.
if (callInst == NULL ||
callInst->getCalledFunction() == NULL)
continue;
if (lGSToGSBaseOffsets(callInst)) {
modifiedAny = true;
goto restart;
}
if (lGSToLoadStore(callInst)) {
modifiedAny = true;
goto restart;
}
if (lImproveMaskedStore(callInst)) {
modifiedAny = true;
goto restart;
}
if (lImproveMaskedLoad(callInst, iter)) {
modifiedAny = true;
goto restart;
}
}
}
DEBUG_END_PASS("GSToLoadStorePass");
DEBUG_END_PASS("ImproveMemoryOps");
return modifiedAny;
}
static llvm::Pass *
CreateGSToLoadStorePass() {
return new GSToLoadStorePass;
CreateImproveMemoryOpsPass() {
return new ImproveMemoryOpsPass;
}