Do all memory op improvements in a single optimization pass.
Rather than having separate passes to do conversion, when possible, of: - General gather/scatter of a vector of pointers to g/s of a base pointer and integer offsets - Gather/scatter to masked load/store, load+broadcast - Masked load/store to regular load/store Now all are done in a single ImproveMemoryOps pass. This change was in particular to address some phase ordering issues that showed up with multidimensional array access wherein after determining that an outer dimension had the same index value, we previously weren't able to take advantage of the uniformity of the resulting pointer.
This commit is contained in:
597
opt.cpp
597
opt.cpp
@@ -89,12 +89,9 @@
|
|||||||
|
|
||||||
static llvm::Pass *CreateIntrinsicsOptPass();
|
static llvm::Pass *CreateIntrinsicsOptPass();
|
||||||
static llvm::Pass *CreateVSelMovmskOptPass();
|
static llvm::Pass *CreateVSelMovmskOptPass();
|
||||||
static llvm::Pass *CreateDetectGSBaseOffsetsPass();
|
|
||||||
static llvm::Pass *CreateGSToLoadStorePass();
|
|
||||||
static llvm::Pass *CreateGatherCoalescePass();
|
|
||||||
static llvm::Pass *CreateMaskedStoreOptPass();
|
|
||||||
static llvm::Pass *CreateMaskedLoadOptPass();
|
|
||||||
|
|
||||||
|
static llvm::Pass *CreateImproveMemoryOpsPass();
|
||||||
|
static llvm::Pass *CreateGatherCoalescePass();
|
||||||
static llvm::Pass *CreateReplacePseudoMemoryOpsPass();
|
static llvm::Pass *CreateReplacePseudoMemoryOpsPass();
|
||||||
|
|
||||||
static llvm::Pass *CreateIsCompileTimeConstantPass(bool isLastTry);
|
static llvm::Pass *CreateIsCompileTimeConstantPass(bool isLastTry);
|
||||||
@@ -414,7 +411,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
// run absolutely no optimizations, since the front-end needs us to
|
// run absolutely no optimizations, since the front-end needs us to
|
||||||
// take the various __pseudo_* functions it has emitted and turn
|
// take the various __pseudo_* functions it has emitted and turn
|
||||||
// them into something that can actually execute.
|
// them into something that can actually execute.
|
||||||
optPM.add(CreateDetectGSBaseOffsetsPass());
|
optPM.add(CreateImproveMemoryOpsPass());
|
||||||
if (g->opt.disableHandlePseudoMemoryOps == false)
|
if (g->opt.disableHandlePseudoMemoryOps == false)
|
||||||
optPM.add(CreateReplacePseudoMemoryOpsPass());
|
optPM.add(CreateReplacePseudoMemoryOpsPass());
|
||||||
|
|
||||||
@@ -446,12 +443,13 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
optPM.add(llvm::createDeadInstEliminationPass());
|
optPM.add(llvm::createDeadInstEliminationPass());
|
||||||
optPM.add(llvm::createCFGSimplificationPass());
|
optPM.add(llvm::createCFGSimplificationPass());
|
||||||
|
|
||||||
optPM.add(CreateDetectGSBaseOffsetsPass());
|
if (g->opt.disableGatherScatterOptimizations == false &&
|
||||||
|
g->target.vectorWidth > 1) {
|
||||||
|
optPM.add(CreateImproveMemoryOpsPass());
|
||||||
|
}
|
||||||
if (!g->opt.disableMaskAllOnOptimizations) {
|
if (!g->opt.disableMaskAllOnOptimizations) {
|
||||||
optPM.add(CreateIntrinsicsOptPass());
|
optPM.add(CreateIntrinsicsOptPass());
|
||||||
optPM.add(CreateVSelMovmskOptPass());
|
optPM.add(CreateVSelMovmskOptPass());
|
||||||
optPM.add(CreateMaskedStoreOptPass());
|
|
||||||
optPM.add(CreateMaskedLoadOptPass());
|
|
||||||
}
|
}
|
||||||
optPM.add(llvm::createDeadInstEliminationPass());
|
optPM.add(llvm::createDeadInstEliminationPass());
|
||||||
|
|
||||||
@@ -485,13 +483,11 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
if (!g->opt.disableMaskAllOnOptimizations) {
|
if (!g->opt.disableMaskAllOnOptimizations) {
|
||||||
optPM.add(CreateIntrinsicsOptPass());
|
optPM.add(CreateIntrinsicsOptPass());
|
||||||
optPM.add(CreateVSelMovmskOptPass());
|
optPM.add(CreateVSelMovmskOptPass());
|
||||||
optPM.add(CreateMaskedStoreOptPass());
|
|
||||||
optPM.add(CreateMaskedLoadOptPass());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g->opt.disableGatherScatterOptimizations == false &&
|
if (g->opt.disableGatherScatterOptimizations == false &&
|
||||||
g->target.vectorWidth > 1) {
|
g->target.vectorWidth > 1) {
|
||||||
optPM.add(CreateGSToLoadStorePass());
|
optPM.add(CreateImproveMemoryOpsPass());
|
||||||
|
|
||||||
if (g->opt.disableCoalescing == false &&
|
if (g->opt.disableCoalescing == false &&
|
||||||
g->target.isa != Target::GENERIC) {
|
g->target.isa != Target::GENERIC) {
|
||||||
@@ -502,23 +498,26 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g->opt.disableHandlePseudoMemoryOps == false)
|
|
||||||
optPM.add(CreateReplacePseudoMemoryOpsPass());
|
|
||||||
|
|
||||||
if (!g->opt.disableMaskAllOnOptimizations) {
|
|
||||||
optPM.add(CreateMaskedStoreOptPass());
|
|
||||||
optPM.add(CreateMaskedLoadOptPass());
|
|
||||||
}
|
|
||||||
|
|
||||||
optPM.add(llvm::createFunctionInliningPass());
|
optPM.add(llvm::createFunctionInliningPass());
|
||||||
optPM.add(llvm::createConstantPropagationPass());
|
optPM.add(llvm::createConstantPropagationPass());
|
||||||
optPM.add(CreateIntrinsicsOptPass());
|
optPM.add(CreateIntrinsicsOptPass());
|
||||||
optPM.add(CreateVSelMovmskOptPass());
|
optPM.add(CreateVSelMovmskOptPass());
|
||||||
|
|
||||||
|
if (g->opt.disableGatherScatterOptimizations == false &&
|
||||||
|
g->target.vectorWidth > 1) {
|
||||||
|
optPM.add(CreateImproveMemoryOpsPass());
|
||||||
|
}
|
||||||
|
|
||||||
optPM.add(llvm::createIPSCCPPass());
|
optPM.add(llvm::createIPSCCPPass());
|
||||||
optPM.add(llvm::createDeadArgEliminationPass());
|
optPM.add(llvm::createDeadArgEliminationPass());
|
||||||
optPM.add(llvm::createInstructionCombiningPass());
|
optPM.add(llvm::createInstructionCombiningPass());
|
||||||
optPM.add(llvm::createCFGSimplificationPass());
|
optPM.add(llvm::createCFGSimplificationPass());
|
||||||
|
|
||||||
|
if (g->opt.disableHandlePseudoMemoryOps == false)
|
||||||
|
optPM.add(CreateReplacePseudoMemoryOpsPass());
|
||||||
|
optPM.add(CreateIntrinsicsOptPass());
|
||||||
|
optPM.add(CreateVSelMovmskOptPass());
|
||||||
|
|
||||||
optPM.add(llvm::createFunctionInliningPass());
|
optPM.add(llvm::createFunctionInliningPass());
|
||||||
optPM.add(llvm::createArgumentPromotionPass());
|
optPM.add(llvm::createArgumentPromotionPass());
|
||||||
optPM.add(llvm::createScalarReplAggregatesPass(-1, false));
|
optPM.add(llvm::createScalarReplAggregatesPass(-1, false));
|
||||||
@@ -959,7 +958,7 @@ CreateVSelMovmskOptPass() {
|
|||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// DetectGSBaseOffsetsPass
|
// ImproveMemoryOpsPass
|
||||||
|
|
||||||
/** When the front-end emits gathers and scatters, it generates an array of
|
/** When the front-end emits gathers and scatters, it generates an array of
|
||||||
vector-width pointers to represent the set of addresses to read from or
|
vector-width pointers to represent the set of addresses to read from or
|
||||||
@@ -971,16 +970,16 @@ CreateVSelMovmskOptPass() {
|
|||||||
See for example the comments discussing the __pseudo_gather functions
|
See for example the comments discussing the __pseudo_gather functions
|
||||||
in builtins.cpp for more information about this.
|
in builtins.cpp for more information about this.
|
||||||
*/
|
*/
|
||||||
class DetectGSBaseOffsetsPass : public llvm::BasicBlockPass {
|
class ImproveMemoryOpsPass : public llvm::BasicBlockPass {
|
||||||
public:
|
public:
|
||||||
static char ID;
|
static char ID;
|
||||||
DetectGSBaseOffsetsPass() : BasicBlockPass(ID) { }
|
ImproveMemoryOpsPass() : BasicBlockPass(ID) { }
|
||||||
|
|
||||||
const char *getPassName() const { return "Gather/Scatter Flattening"; }
|
const char *getPassName() const { return "Improve Memory Ops"; }
|
||||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||||
};
|
};
|
||||||
|
|
||||||
char DetectGSBaseOffsetsPass::ID = 0;
|
char ImproveMemoryOpsPass::ID = 0;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1670,7 +1669,9 @@ lOffsets32BitSafe(llvm::Value **variableOffsetPtr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct GSInfo {
|
static bool
|
||||||
|
lGSToGSBaseOffsets(llvm::CallInst *callInst) {
|
||||||
|
struct GSInfo {
|
||||||
GSInfo(const char *pgFuncName, const char *pgboFuncName,
|
GSInfo(const char *pgFuncName, const char *pgboFuncName,
|
||||||
const char *pgbo32FuncName, bool ig)
|
const char *pgbo32FuncName, bool ig)
|
||||||
: isGather(ig) {
|
: isGather(ig) {
|
||||||
@@ -1681,12 +1682,7 @@ struct GSInfo {
|
|||||||
llvm::Function *func;
|
llvm::Function *func;
|
||||||
llvm::Function *baseOffsetsFunc, *baseOffsets32Func;
|
llvm::Function *baseOffsetsFunc, *baseOffsets32Func;
|
||||||
const bool isGather;
|
const bool isGather;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
bool
|
|
||||||
DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|
||||||
DEBUG_START_PASS("DetectGSBaseOffsets");
|
|
||||||
|
|
||||||
GSInfo gsFuncs[] = {
|
GSInfo gsFuncs[] = {
|
||||||
GSInfo("__pseudo_gather32_i8", "__pseudo_gather_base_offsets32_i8",
|
GSInfo("__pseudo_gather32_i8", "__pseudo_gather_base_offsets32_i8",
|
||||||
@@ -1741,21 +1737,12 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
GSInfo("__pseudo_scatter64_double", "__pseudo_scatter_base_offsets64_double",
|
GSInfo("__pseudo_scatter64_double", "__pseudo_scatter_base_offsets64_double",
|
||||||
"__pseudo_scatter_base_offsets32_double", false),
|
"__pseudo_scatter_base_offsets32_double", false),
|
||||||
};
|
};
|
||||||
|
|
||||||
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
|
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
|
||||||
for (int i = 0; i < numGSFuncs; ++i)
|
for (int i = 0; i < numGSFuncs; ++i)
|
||||||
Assert(gsFuncs[i].func != NULL && gsFuncs[i].baseOffsetsFunc != NULL &&
|
Assert(gsFuncs[i].func != NULL && gsFuncs[i].baseOffsetsFunc != NULL &&
|
||||||
gsFuncs[i].baseOffsets32Func != NULL);
|
gsFuncs[i].baseOffsets32Func != NULL);
|
||||||
|
|
||||||
bool modifiedAny = false;
|
|
||||||
restart:
|
|
||||||
// Iterate through all of the instructions in the basic block.
|
|
||||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
|
||||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
|
||||||
// If we don't have a call to one of the
|
|
||||||
// __pseudo_{gather,scatter}_* functions, then just go on to the
|
|
||||||
// next instruction.
|
|
||||||
if (callInst == NULL)
|
|
||||||
continue;
|
|
||||||
GSInfo *info = NULL;
|
GSInfo *info = NULL;
|
||||||
for (int i = 0; i < numGSFuncs; ++i)
|
for (int i = 0; i < numGSFuncs; ++i)
|
||||||
if (gsFuncs[i].func != NULL &&
|
if (gsFuncs[i].func != NULL &&
|
||||||
@@ -1764,7 +1751,7 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (info == NULL)
|
if (info == NULL)
|
||||||
continue;
|
return false;
|
||||||
|
|
||||||
// Try to transform the array of pointers to a single base pointer
|
// Try to transform the array of pointers to a single base pointer
|
||||||
// and an array of int32 offsets. (All the hard work is done by
|
// and an array of int32 offsets. (All the hard work is done by
|
||||||
@@ -1778,7 +1765,7 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
// It's actually a fully general gather/scatter with a varying
|
// It's actually a fully general gather/scatter with a varying
|
||||||
// set of base pointers, so leave it as is and continune onward
|
// set of base pointers, so leave it as is and continune onward
|
||||||
// to the next instruction...
|
// to the next instruction...
|
||||||
continue;
|
return false;
|
||||||
|
|
||||||
// Try to decompose the offset vector into a compile time constant
|
// Try to decompose the offset vector into a compile time constant
|
||||||
// component and a varying component. The constant component is
|
// component and a varying component. The constant component is
|
||||||
@@ -1843,254 +1830,19 @@ DetectGSBaseOffsetsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
lCopyMetadata(newCall, callInst);
|
lCopyMetadata(newCall, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||||
}
|
}
|
||||||
modifiedAny = true;
|
|
||||||
goto restart;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_END_PASS("DetectGSBaseOffsets");
|
return true;
|
||||||
|
|
||||||
return modifiedAny;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static llvm::Pass *
|
static llvm::Value *
|
||||||
CreateDetectGSBaseOffsetsPass() {
|
lComputeCommonPointer(llvm::Value *base, llvm::Value *offsets,
|
||||||
return new DetectGSBaseOffsetsPass;
|
llvm::Instruction *insertBefore) {
|
||||||
|
llvm::Value *firstOffset = LLVMExtractFirstVectorElement(offsets);
|
||||||
|
return lGEPInst(base, firstOffset, "ptr", insertBefore);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
|
||||||
// MaskedStoreOptPass
|
|
||||||
|
|
||||||
/** Masked stores are generally more complex than regular stores; for
|
|
||||||
example, they require multiple instructions to simulate under SSE.
|
|
||||||
This optimization detects cases where masked stores can be replaced
|
|
||||||
with regular stores or removed entirely, for the cases of an 'all on'
|
|
||||||
mask and an 'all off' mask, respectively.
|
|
||||||
*/
|
|
||||||
class MaskedStoreOptPass : public llvm::BasicBlockPass {
|
|
||||||
public:
|
|
||||||
static char ID;
|
|
||||||
MaskedStoreOptPass() : BasicBlockPass(ID) { }
|
|
||||||
|
|
||||||
const char *getPassName() const { return "Masked Store Scalarize"; }
|
|
||||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
char MaskedStoreOptPass::ID = 0;
|
|
||||||
|
|
||||||
struct MSInfo {
|
|
||||||
MSInfo(const char *name, const int a)
|
|
||||||
: align(a) {
|
|
||||||
func = m->module->getFunction(name);
|
|
||||||
Assert(func != NULL);
|
|
||||||
}
|
|
||||||
llvm::Function *func;
|
|
||||||
const int align;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
bool
|
|
||||||
MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|
||||||
DEBUG_START_PASS("MaskedStoreOpt");
|
|
||||||
|
|
||||||
MSInfo msInfo[] = {
|
|
||||||
MSInfo("__pseudo_masked_store_i8", 1),
|
|
||||||
MSInfo("__pseudo_masked_store_i16", 2),
|
|
||||||
MSInfo("__pseudo_masked_store_i32", 4),
|
|
||||||
MSInfo("__pseudo_masked_store_float", 4),
|
|
||||||
MSInfo("__pseudo_masked_store_i64", 8),
|
|
||||||
MSInfo("__pseudo_masked_store_double", 8),
|
|
||||||
MSInfo("__masked_store_blend_i8", 1),
|
|
||||||
MSInfo("__masked_store_blend_i16", 2),
|
|
||||||
MSInfo("__masked_store_blend_i32", 4),
|
|
||||||
MSInfo("__masked_store_blend_float", 4),
|
|
||||||
MSInfo("__masked_store_blend_i64", 8),
|
|
||||||
MSInfo("__masked_store_blend_double", 8),
|
|
||||||
MSInfo("__masked_store_i8", 1),
|
|
||||||
MSInfo("__masked_store_i16", 2),
|
|
||||||
MSInfo("__masked_store_i32", 4),
|
|
||||||
MSInfo("__masked_store_float", 4),
|
|
||||||
MSInfo("__masked_store_i64", 8),
|
|
||||||
MSInfo("__masked_store_double", 8)
|
|
||||||
};
|
|
||||||
|
|
||||||
bool modifiedAny = false;
|
|
||||||
restart:
|
|
||||||
// Iterate over all of the instructions to look for one of the various
|
|
||||||
// masked store functions
|
|
||||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
|
||||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
|
||||||
if (callInst == NULL)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
llvm::Function *called = callInst->getCalledFunction();
|
|
||||||
if (called == NULL)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
int nMSFuncs = sizeof(msInfo) / sizeof(msInfo[0]);
|
|
||||||
MSInfo *info = NULL;
|
|
||||||
for (int i = 0; i < nMSFuncs; ++i) {
|
|
||||||
if (msInfo[i].func != NULL && called == msInfo[i].func) {
|
|
||||||
info = &msInfo[i];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (info == NULL)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Got one; grab the operands
|
|
||||||
llvm::Value *lvalue = callInst->getArgOperand(0);
|
|
||||||
llvm::Value *rvalue = callInst->getArgOperand(1);
|
|
||||||
llvm::Value *mask = callInst->getArgOperand(2);
|
|
||||||
|
|
||||||
MaskStatus maskStatus = lGetMaskStatus(mask);
|
|
||||||
if (maskStatus == ALL_OFF) {
|
|
||||||
// Zero mask - no-op, so remove the store completely. (This
|
|
||||||
// may in turn lead to being able to optimize out instructions
|
|
||||||
// that compute the rvalue...)
|
|
||||||
callInst->eraseFromParent();
|
|
||||||
modifiedAny = true;
|
|
||||||
goto restart;
|
|
||||||
}
|
|
||||||
else if (maskStatus == ALL_ON) {
|
|
||||||
// The mask is all on, so turn this into a regular store
|
|
||||||
llvm::Type *rvalueType = rvalue->getType();
|
|
||||||
llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0);
|
|
||||||
|
|
||||||
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
|
|
||||||
lCopyMetadata(lvalue, callInst);
|
|
||||||
llvm::Instruction *store =
|
|
||||||
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
|
||||||
info->align);
|
|
||||||
lCopyMetadata(store, callInst);
|
|
||||||
llvm::ReplaceInstWithInst(callInst, store);
|
|
||||||
|
|
||||||
modifiedAny = true;
|
|
||||||
goto restart;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_END_PASS("MaskedStoreOpt");
|
|
||||||
|
|
||||||
return modifiedAny;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static llvm::Pass *
|
|
||||||
CreateMaskedStoreOptPass() {
|
|
||||||
return new MaskedStoreOptPass;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
|
||||||
// MaskedLoadOptPass
|
|
||||||
|
|
||||||
/** Masked load improvements for the all on/all off mask cases.
|
|
||||||
*/
|
|
||||||
class MaskedLoadOptPass : public llvm::BasicBlockPass {
|
|
||||||
public:
|
|
||||||
static char ID;
|
|
||||||
MaskedLoadOptPass() : BasicBlockPass(ID) { }
|
|
||||||
|
|
||||||
const char *getPassName() const { return "Masked Load Improvements"; }
|
|
||||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
char MaskedLoadOptPass::ID = 0;
|
|
||||||
|
|
||||||
struct MLInfo {
|
|
||||||
MLInfo(const char *name, const int a)
|
|
||||||
: align(a) {
|
|
||||||
func = m->module->getFunction(name);
|
|
||||||
Assert(func != NULL);
|
|
||||||
}
|
|
||||||
llvm::Function *func;
|
|
||||||
const int align;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
bool
|
|
||||||
MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|
||||||
DEBUG_START_PASS("MaskedLoadOpt");
|
|
||||||
|
|
||||||
MLInfo mlInfo[] = {
|
|
||||||
MLInfo("__masked_load_i8", 1),
|
|
||||||
MLInfo("__masked_load_i16", 2),
|
|
||||||
MLInfo("__masked_load_i32", 4),
|
|
||||||
MLInfo("__masked_load_float", 4),
|
|
||||||
MLInfo("__masked_load_i64", 8),
|
|
||||||
MLInfo("__masked_load_double", 8)
|
|
||||||
};
|
|
||||||
|
|
||||||
bool modifiedAny = false;
|
|
||||||
restart:
|
|
||||||
// Iterate over all of the instructions to look for one of the various
|
|
||||||
// masked load functions
|
|
||||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
|
||||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
|
||||||
if (!callInst)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
llvm::Function *called = callInst->getCalledFunction();
|
|
||||||
if (called == NULL)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
int nFuncs = sizeof(mlInfo) / sizeof(mlInfo[0]);
|
|
||||||
MLInfo *info = NULL;
|
|
||||||
for (int i = 0; i < nFuncs; ++i) {
|
|
||||||
if (mlInfo[i].func != NULL && called == mlInfo[i].func) {
|
|
||||||
info = &mlInfo[i];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (info == NULL)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Got one; grab the operands
|
|
||||||
llvm::Value *ptr = callInst->getArgOperand(0);
|
|
||||||
llvm::Value *mask = callInst->getArgOperand(1);
|
|
||||||
|
|
||||||
MaskStatus maskStatus = lGetMaskStatus(mask);
|
|
||||||
if (maskStatus == ALL_OFF) {
|
|
||||||
// Zero mask - no-op, so replace the load with an undef value
|
|
||||||
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
|
||||||
iter, llvm::UndefValue::get(callInst->getType()));
|
|
||||||
modifiedAny = true;
|
|
||||||
goto restart;
|
|
||||||
}
|
|
||||||
else if (maskStatus == ALL_ON) {
|
|
||||||
// The mask is all on, so turn this into a regular load
|
|
||||||
llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0);
|
|
||||||
ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load",
|
|
||||||
callInst);
|
|
||||||
llvm::Instruction *load =
|
|
||||||
new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */,
|
|
||||||
info->align, (llvm::Instruction *)NULL);
|
|
||||||
lCopyMetadata(load, callInst);
|
|
||||||
llvm::ReplaceInstWithInst(callInst, load);
|
|
||||||
modifiedAny = true;
|
|
||||||
goto restart;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_END_PASS("MaskedLoadOpt");
|
|
||||||
|
|
||||||
return modifiedAny;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static llvm::Pass *
|
|
||||||
CreateMaskedLoadOptPass() {
|
|
||||||
return new MaskedLoadOptPass;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
|
||||||
// GSToLoadStorePass
|
|
||||||
|
|
||||||
/** After earlier optimization passes have run, we are sometimes able to
|
/** After earlier optimization passes have run, we are sometimes able to
|
||||||
determine that gathers/scatters are actually accessing memory in a more
|
determine that gathers/scatters are actually accessing memory in a more
|
||||||
regular fashion and then change the operation to something simpler and
|
regular fashion and then change the operation to something simpler and
|
||||||
@@ -2106,20 +1858,9 @@ CreateMaskedLoadOptPass() {
|
|||||||
shuffle or things that could be handled with hybrids of e.g. 2 4-wide
|
shuffle or things that could be handled with hybrids of e.g. 2 4-wide
|
||||||
vector loads with AVX, etc.
|
vector loads with AVX, etc.
|
||||||
*/
|
*/
|
||||||
class GSToLoadStorePass : public llvm::BasicBlockPass {
|
static bool
|
||||||
public:
|
lGSToLoadStore(llvm::CallInst *callInst) {
|
||||||
static char ID;
|
struct GatherImpInfo {
|
||||||
GSToLoadStorePass() : BasicBlockPass(ID) { }
|
|
||||||
|
|
||||||
const char *getPassName() const { return "Gather/Scatter Improvements"; }
|
|
||||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
char GSToLoadStorePass::ID = 0;
|
|
||||||
|
|
||||||
|
|
||||||
struct GatherImpInfo {
|
|
||||||
GatherImpInfo(const char *pName, const char *lmName, llvm::Type *st,
|
GatherImpInfo(const char *pName, const char *lmName, llvm::Type *st,
|
||||||
int a)
|
int a)
|
||||||
: align(a) {
|
: align(a) {
|
||||||
@@ -2133,36 +1874,7 @@ struct GatherImpInfo {
|
|||||||
llvm::Function *loadMaskedFunc;
|
llvm::Function *loadMaskedFunc;
|
||||||
llvm::Type *scalarType;
|
llvm::Type *scalarType;
|
||||||
const int align;
|
const int align;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static llvm::Value *
|
|
||||||
lComputeCommonPointer(llvm::Value *base, llvm::Value *offsets,
|
|
||||||
llvm::Instruction *insertBefore) {
|
|
||||||
llvm::Value *firstOffset = LLVMExtractFirstVectorElement(offsets);
|
|
||||||
return lGEPInst(base, firstOffset, "ptr", insertBefore);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
struct ScatterImpInfo {
|
|
||||||
ScatterImpInfo(const char *pName, const char *msName,
|
|
||||||
llvm::Type *vpt, int a)
|
|
||||||
: align(a) {
|
|
||||||
pseudoFunc = m->module->getFunction(pName);
|
|
||||||
maskedStoreFunc = m->module->getFunction(msName);
|
|
||||||
vecPtrType = vpt;
|
|
||||||
Assert(pseudoFunc != NULL && maskedStoreFunc != NULL);
|
|
||||||
}
|
|
||||||
llvm::Function *pseudoFunc;
|
|
||||||
llvm::Function *maskedStoreFunc;
|
|
||||||
llvm::Type *vecPtrType;
|
|
||||||
const int align;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
bool
|
|
||||||
GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|
||||||
DEBUG_START_PASS("GSToLoadStorePass");
|
|
||||||
|
|
||||||
GatherImpInfo gInfo[] = {
|
GatherImpInfo gInfo[] = {
|
||||||
GatherImpInfo("__pseudo_gather_base_offsets32_i8", "__masked_load_i8",
|
GatherImpInfo("__pseudo_gather_base_offsets32_i8", "__masked_load_i8",
|
||||||
@@ -2190,6 +1902,22 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
GatherImpInfo("__pseudo_gather_base_offsets64_double", "__masked_load_double",
|
GatherImpInfo("__pseudo_gather_base_offsets64_double", "__masked_load_double",
|
||||||
LLVMTypes::DoubleType, 8)
|
LLVMTypes::DoubleType, 8)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ScatterImpInfo {
|
||||||
|
ScatterImpInfo(const char *pName, const char *msName,
|
||||||
|
llvm::Type *vpt, int a)
|
||||||
|
: align(a) {
|
||||||
|
pseudoFunc = m->module->getFunction(pName);
|
||||||
|
maskedStoreFunc = m->module->getFunction(msName);
|
||||||
|
vecPtrType = vpt;
|
||||||
|
Assert(pseudoFunc != NULL && maskedStoreFunc != NULL);
|
||||||
|
}
|
||||||
|
llvm::Function *pseudoFunc;
|
||||||
|
llvm::Function *maskedStoreFunc;
|
||||||
|
llvm::Type *vecPtrType;
|
||||||
|
const int align;
|
||||||
|
};
|
||||||
|
|
||||||
ScatterImpInfo sInfo[] = {
|
ScatterImpInfo sInfo[] = {
|
||||||
ScatterImpInfo("__pseudo_scatter_base_offsets32_i8", "__pseudo_masked_store_i8",
|
ScatterImpInfo("__pseudo_scatter_base_offsets32_i8", "__pseudo_masked_store_i8",
|
||||||
LLVMTypes::Int8VectorPointerType, 1),
|
LLVMTypes::Int8VectorPointerType, 1),
|
||||||
@@ -2217,19 +1945,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
LLVMTypes::DoubleVectorPointerType, 8)
|
LLVMTypes::DoubleVectorPointerType, 8)
|
||||||
};
|
};
|
||||||
|
|
||||||
bool modifiedAny = false;
|
|
||||||
|
|
||||||
restart:
|
|
||||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
|
||||||
// Iterate over all of the instructions and look for calls to
|
|
||||||
// __pseudo_*_base_offsets_* calls.
|
|
||||||
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
|
||||||
if (callInst == NULL)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
llvm::Function *calledFunc = callInst->getCalledFunction();
|
llvm::Function *calledFunc = callInst->getCalledFunction();
|
||||||
if (calledFunc == NULL)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
GatherImpInfo *gatherInfo = NULL;
|
GatherImpInfo *gatherInfo = NULL;
|
||||||
ScatterImpInfo *scatterInfo = NULL;
|
ScatterImpInfo *scatterInfo = NULL;
|
||||||
@@ -2248,7 +1964,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (gatherInfo == NULL && scatterInfo == NULL)
|
if (gatherInfo == NULL && scatterInfo == NULL)
|
||||||
continue;
|
return false;
|
||||||
|
|
||||||
SourcePos pos;
|
SourcePos pos;
|
||||||
lGetSourcePosFromMetadata(callInst, &pos);
|
lGetSourcePosFromMetadata(callInst, &pos);
|
||||||
@@ -2313,8 +2029,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
lCopyMetadata(vecValue, callInst);
|
lCopyMetadata(vecValue, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst,
|
llvm::ReplaceInstWithInst(callInst,
|
||||||
llvm::dyn_cast<llvm::Instruction>(vecValue));
|
llvm::dyn_cast<llvm::Instruction>(vecValue));
|
||||||
modifiedAny = true;
|
return true;
|
||||||
goto restart;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// A scatter with everyone going to the same location is
|
// A scatter with everyone going to the same location is
|
||||||
@@ -2331,6 +2046,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
// case. We'll just let a bunch of the program instances
|
// case. We'll just let a bunch of the program instances
|
||||||
// do redundant writes, since this isn't important to make
|
// do redundant writes, since this isn't important to make
|
||||||
// fast anyway...
|
// fast anyway...
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -2351,6 +2067,7 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
LLVMGetName(ptr, "_masked_load"));
|
LLVMGetName(ptr, "_masked_load"));
|
||||||
lCopyMetadata(newCall, callInst);
|
lCopyMetadata(newCall, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Debug(pos, "Transformed scatter to unaligned vector store!");
|
Debug(pos, "Transformed scatter to unaligned vector store!");
|
||||||
@@ -2361,23 +2078,207 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
mask, "");
|
mask, "");
|
||||||
lCopyMetadata(newCall, callInst);
|
lCopyMetadata(newCall, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// MaskedStoreOptPass
|
||||||
|
|
||||||
|
/** Masked stores are generally more complex than regular stores; for
|
||||||
|
example, they require multiple instructions to simulate under SSE.
|
||||||
|
This optimization detects cases where masked stores can be replaced
|
||||||
|
with regular stores or removed entirely, for the cases of an 'all on'
|
||||||
|
mask and an 'all off' mask, respectively.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
lImproveMaskedStore(llvm::CallInst *callInst) {
|
||||||
|
struct MSInfo {
|
||||||
|
MSInfo(const char *name, const int a)
|
||||||
|
: align(a) {
|
||||||
|
func = m->module->getFunction(name);
|
||||||
|
Assert(func != NULL);
|
||||||
|
}
|
||||||
|
llvm::Function *func;
|
||||||
|
const int align;
|
||||||
|
};
|
||||||
|
|
||||||
|
MSInfo msInfo[] = {
|
||||||
|
MSInfo("__pseudo_masked_store_i8", 1),
|
||||||
|
MSInfo("__pseudo_masked_store_i16", 2),
|
||||||
|
MSInfo("__pseudo_masked_store_i32", 4),
|
||||||
|
MSInfo("__pseudo_masked_store_float", 4),
|
||||||
|
MSInfo("__pseudo_masked_store_i64", 8),
|
||||||
|
MSInfo("__pseudo_masked_store_double", 8),
|
||||||
|
MSInfo("__masked_store_blend_i8", 1),
|
||||||
|
MSInfo("__masked_store_blend_i16", 2),
|
||||||
|
MSInfo("__masked_store_blend_i32", 4),
|
||||||
|
MSInfo("__masked_store_blend_float", 4),
|
||||||
|
MSInfo("__masked_store_blend_i64", 8),
|
||||||
|
MSInfo("__masked_store_blend_double", 8),
|
||||||
|
MSInfo("__masked_store_i8", 1),
|
||||||
|
MSInfo("__masked_store_i16", 2),
|
||||||
|
MSInfo("__masked_store_i32", 4),
|
||||||
|
MSInfo("__masked_store_float", 4),
|
||||||
|
MSInfo("__masked_store_i64", 8),
|
||||||
|
MSInfo("__masked_store_double", 8)
|
||||||
|
};
|
||||||
|
|
||||||
|
llvm::Function *called = callInst->getCalledFunction();
|
||||||
|
|
||||||
|
int nMSFuncs = sizeof(msInfo) / sizeof(msInfo[0]);
|
||||||
|
MSInfo *info = NULL;
|
||||||
|
for (int i = 0; i < nMSFuncs; ++i) {
|
||||||
|
if (msInfo[i].func != NULL && called == msInfo[i].func) {
|
||||||
|
info = &msInfo[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (info == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Got one; grab the operands
|
||||||
|
llvm::Value *lvalue = callInst->getArgOperand(0);
|
||||||
|
llvm::Value *rvalue = callInst->getArgOperand(1);
|
||||||
|
llvm::Value *mask = callInst->getArgOperand(2);
|
||||||
|
|
||||||
|
MaskStatus maskStatus = lGetMaskStatus(mask);
|
||||||
|
if (maskStatus == ALL_OFF) {
|
||||||
|
// Zero mask - no-op, so remove the store completely. (This
|
||||||
|
// may in turn lead to being able to optimize out instructions
|
||||||
|
// that compute the rvalue...)
|
||||||
|
callInst->eraseFromParent();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (maskStatus == ALL_ON) {
|
||||||
|
// The mask is all on, so turn this into a regular store
|
||||||
|
llvm::Type *rvalueType = rvalue->getType();
|
||||||
|
llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0);
|
||||||
|
|
||||||
|
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
|
||||||
|
lCopyMetadata(lvalue, callInst);
|
||||||
|
llvm::Instruction *store =
|
||||||
|
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
||||||
|
info->align);
|
||||||
|
lCopyMetadata(store, callInst);
|
||||||
|
llvm::ReplaceInstWithInst(callInst, store);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lImproveMaskedLoad(llvm::CallInst *callInst,
|
||||||
|
llvm::BasicBlock::iterator iter) {
|
||||||
|
struct MLInfo {
|
||||||
|
MLInfo(const char *name, const int a)
|
||||||
|
: align(a) {
|
||||||
|
func = m->module->getFunction(name);
|
||||||
|
Assert(func != NULL);
|
||||||
|
}
|
||||||
|
llvm::Function *func;
|
||||||
|
const int align;
|
||||||
|
};
|
||||||
|
|
||||||
|
MLInfo mlInfo[] = {
|
||||||
|
MLInfo("__masked_load_i8", 1),
|
||||||
|
MLInfo("__masked_load_i16", 2),
|
||||||
|
MLInfo("__masked_load_i32", 4),
|
||||||
|
MLInfo("__masked_load_float", 4),
|
||||||
|
MLInfo("__masked_load_i64", 8),
|
||||||
|
MLInfo("__masked_load_double", 8)
|
||||||
|
};
|
||||||
|
|
||||||
|
llvm::Function *called = callInst->getCalledFunction();
|
||||||
|
|
||||||
|
int nFuncs = sizeof(mlInfo) / sizeof(mlInfo[0]);
|
||||||
|
MLInfo *info = NULL;
|
||||||
|
for (int i = 0; i < nFuncs; ++i) {
|
||||||
|
if (mlInfo[i].func != NULL && called == mlInfo[i].func) {
|
||||||
|
info = &mlInfo[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (info == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Got one; grab the operands
|
||||||
|
llvm::Value *ptr = callInst->getArgOperand(0);
|
||||||
|
llvm::Value *mask = callInst->getArgOperand(1);
|
||||||
|
|
||||||
|
MaskStatus maskStatus = lGetMaskStatus(mask);
|
||||||
|
if (maskStatus == ALL_OFF) {
|
||||||
|
// Zero mask - no-op, so replace the load with an undef value
|
||||||
|
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||||
|
iter, llvm::UndefValue::get(callInst->getType()));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (maskStatus == ALL_ON) {
|
||||||
|
// The mask is all on, so turn this into a regular load
|
||||||
|
llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0);
|
||||||
|
ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load",
|
||||||
|
callInst);
|
||||||
|
llvm::Instruction *load =
|
||||||
|
new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */,
|
||||||
|
info->align, (llvm::Instruction *)NULL);
|
||||||
|
lCopyMetadata(load, callInst);
|
||||||
|
llvm::ReplaceInstWithInst(callInst, load);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
ImproveMemoryOpsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||||
|
DEBUG_START_PASS("ImproveMemoryOps");
|
||||||
|
|
||||||
|
bool modifiedAny = false;
|
||||||
|
restart:
|
||||||
|
// Iterate through all of the instructions in the basic block.
|
||||||
|
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||||
|
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
|
||||||
|
// If we don't have a call to one of the
|
||||||
|
// __pseudo_{gather,scatter}_* functions, then just go on to the
|
||||||
|
// next instruction.
|
||||||
|
if (callInst == NULL ||
|
||||||
|
callInst->getCalledFunction() == NULL)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (lGSToGSBaseOffsets(callInst)) {
|
||||||
|
modifiedAny = true;
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
|
if (lGSToLoadStore(callInst)) {
|
||||||
|
modifiedAny = true;
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
|
if (lImproveMaskedStore(callInst)) {
|
||||||
|
modifiedAny = true;
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
|
if (lImproveMaskedLoad(callInst, iter)) {
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_END_PASS("GSToLoadStorePass");
|
DEBUG_END_PASS("ImproveMemoryOps");
|
||||||
|
|
||||||
return modifiedAny;
|
return modifiedAny;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static llvm::Pass *
|
static llvm::Pass *
|
||||||
CreateGSToLoadStorePass() {
|
CreateImproveMemoryOpsPass() {
|
||||||
return new GSToLoadStorePass;
|
return new ImproveMemoryOpsPass;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user