Add support for int8/int16 types. Addresses issues #9 and #42.

This commit is contained in:
Matt Pharr
2011-07-21 06:57:40 +01:00
parent 2d573acd17
commit bba7211654
64 changed files with 2317 additions and 885 deletions

487
opt.cpp
View File

@@ -409,7 +409,6 @@ IntrinsicsOpt::IntrinsicsOpt()
llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse_movmsk_ps);
maskInstructions.push_back(sseMovmsk);
maskInstructions.push_back(m->module->getFunction("llvm.x86.avx.movmsk.ps"));
maskInstructions.push_back(m->module->getFunction("llvm.x86.mic.mask16.to.int"));
maskInstructions.push_back(m->module->getFunction("__movmsk"));
// And all of the blend instructions
@@ -418,8 +417,6 @@ IntrinsicsOpt::IntrinsicsOpt()
0xf, 0, 1, 2));
blendInstructions.push_back(BlendInstruction(
m->module->getFunction("llvm.x86.avx.blendvps"), 0xff, 0, 1, 2));
blendInstructions.push_back(BlendInstruction(
m->module->getFunction("llvm.x86.mic.blend.ps"), 0xffff, 1, 2, 0));
}
@@ -499,8 +496,8 @@ bool
IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
bool modifiedAny = false;
restart:
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
if (!callInst)
continue;
@@ -512,7 +509,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
// If the values are the same, then no need to blend..
if (v[0] == v[1]) {
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[0]);
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, v[0]);
modifiedAny = true;
goto restart;
}
@@ -524,12 +522,14 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
// otherwise the result is undefined and any value is fine,
// ergo the defined one is an acceptable result.)
if (lIsUndef(v[0])) {
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[1]);
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, v[1]);
modifiedAny = true;
goto restart;
}
if (lIsUndef(v[1])) {
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, v[0]);
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, v[0]);
modifiedAny = true;
goto restart;
}
@@ -544,7 +544,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
value = v[1];
if (value != NULL) {
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, value);
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, value);
modifiedAny = true;
goto restart;
}
@@ -557,7 +558,8 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
// with the corresponding integer mask from its elements
// high bits.
llvm::Value *value = LLVMInt32(mask);
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, value);
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, value);
modifiedAny = true;
goto restart;
}
@@ -653,10 +655,18 @@ lSizeOfIfKnown(const llvm::Type *type, uint64_t *size) {
*size = 1;
return true;
}
if (type == LLVMTypes::Int8VectorType) {
*size = g->target.vectorWidth * 1;
return true;
}
else if (type == LLVMTypes::Int16Type) {
*size = 2;
return true;
}
if (type == LLVMTypes::Int16VectorType) {
*size = g->target.vectorWidth * 2;
return true;
}
else if (type == LLVMTypes::FloatType || type == LLVMTypes::Int32Type) {
*size = 4;
return true;
@@ -978,33 +988,53 @@ lGetPtrAndOffsets(llvm::Value *ptrs, llvm::Value **basePtr,
}
struct GSInfo {
GSInfo(const char *pgFuncName, const char *pgboFuncName, bool ig, int es)
: isGather(ig), elementSize(es) {
func = m->module->getFunction(pgFuncName);
baseOffsetsFunc = m->module->getFunction(pgboFuncName);
}
llvm::Function *func;
llvm::Function *baseOffsetsFunc;
const bool isGather;
const int elementSize;
};
bool
GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_32");
llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_64");
llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_32");
llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_64");
assert(gather32Func && gather64Func && scatter32Func && scatter64Func);
GSInfo gsFuncs[] = {
GSInfo("__pseudo_gather_8", "__pseudo_gather_base_offsets_8", true, 1),
GSInfo("__pseudo_gather_16", "__pseudo_gather_base_offsets_16", true, 2),
GSInfo("__pseudo_gather_32", "__pseudo_gather_base_offsets_32", true, 4),
GSInfo("__pseudo_gather_64", "__pseudo_gather_base_offsets_64", true, 8),
GSInfo("__pseudo_scatter_8", "__pseudo_scatter_base_offsets_8", false, 1),
GSInfo("__pseudo_scatter_16", "__pseudo_scatter_base_offsets_16", false, 2),
GSInfo("__pseudo_scatter_32", "__pseudo_scatter_base_offsets_32", false, 4),
GSInfo("__pseudo_scatter_64", "__pseudo_scatter_base_offsets_64", false, 8),
};
int numGSFuncs = sizeof(gsFuncs) / sizeof(gsFuncs[0]);
for (int i = 0; i < numGSFuncs; ++i)
assert(gsFuncs[i].func != NULL && gsFuncs[i].baseOffsetsFunc != NULL);
bool modifiedAny = false;
restart:
// Iterate through all of the instructions in the basic block.
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
// If we don't have a call to one of the
// __pseudo_{gather,scatter}_* functions, then just go on to the
// next instruction.
if (!callInst ||
(callInst->getCalledFunction() != gather32Func &&
callInst->getCalledFunction() != gather64Func &&
callInst->getCalledFunction() != scatter32Func &&
callInst->getCalledFunction() != scatter64Func))
if (callInst == NULL)
continue;
GSInfo *info = NULL;
for (int i = 0; i < numGSFuncs; ++i)
if (callInst->getCalledFunction() == gsFuncs[i].func) {
info = &gsFuncs[i];
break;
}
if (info == NULL)
continue;
bool isGather = (callInst->getCalledFunction() == gather32Func ||
callInst->getCalledFunction() == gather64Func);
bool is32 = (callInst->getCalledFunction() == gather32Func ||
callInst->getCalledFunction() == scatter32Func);
// Transform the array of pointers to a single base pointer and an
// array of int32 offsets. (All the hard work is done by
@@ -1012,19 +1042,15 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::Value *ptrs = callInst->getArgOperand(0);
llvm::Value *basePtr = NULL;
llvm::Value *offsetVector = lGetPtrAndOffsets(ptrs, &basePtr, callInst,
is32 ? 4 : 8);
info->elementSize);
// Cast the base pointer to a void *, since that's what the
// __pseudo_*_base_offsets_* functions want.
basePtr = new llvm::BitCastInst(basePtr, LLVMTypes::VoidPointerType, "base2void",
callInst);
basePtr = new llvm::BitCastInst(basePtr, LLVMTypes::VoidPointerType,
"base2void", callInst);
lCopyMetadata(basePtr, callInst);
if (isGather) {
if (info->isGather) {
llvm::Value *mask = callInst->getArgOperand(1);
llvm::Function *gFunc =
m->module->getFunction(is32 ? "__pseudo_gather_base_offsets_32" :
"__pseudo_gather_base_offsets_64");
assert(gFunc != NULL);
// Generate a new function call to the next pseudo gather
// base+offsets instruction. Note that we're passing a NULL
@@ -1035,11 +1061,12 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
llvm::ArrayRef<llvm::Value *> newArgArray(&newArgs[0], &newArgs[3]);
llvm::Instruction *newCall =
llvm::CallInst::Create(gFunc, newArgArray, "newgather",
(llvm::Instruction *)NULL);
llvm::CallInst::Create(info->baseOffsetsFunc, newArgArray,
"newgather", (llvm::Instruction *)NULL);
#else
llvm::Instruction *newCall =
llvm::CallInst::Create(gFunc, &newArgs[0], &newArgs[3], "newgather");
llvm::CallInst::Create(info->baseOffsetsFunc, &newArgs[0], &newArgs[3],
"newgather");
#endif
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
@@ -1047,10 +1074,6 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
else {
llvm::Value *mask = callInst->getArgOperand(2);
llvm::Value *rvalue = callInst->getArgOperand(1);
llvm::Function *gFunc =
m->module->getFunction(is32 ? "__pseudo_scatter_base_offsets_32" :
"__pseudo_scatter_base_offsets_64");
assert(gFunc);
// Generate a new function call to the next pseudo scatter
// base+offsets instruction. See above for why passing NULL
@@ -1059,11 +1082,12 @@ GatherScatterFlattenOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
llvm::ArrayRef<llvm::Value *> newArgArray(&newArgs[0], &newArgs[4]);
llvm::Instruction *newCall =
llvm::CallInst::Create(gFunc, newArgArray, "",
llvm::CallInst::Create(info->baseOffsetsFunc, newArgArray, "",
(llvm::Instruction *)NULL);
#else
llvm::Instruction *newCall =
llvm::CallInst::Create(gFunc, &newArgs[0], &newArgs[4]);
llvm::CallInst::Create(info->baseOffsetsFunc, &newArgs[0],
&newArgs[4]);
#endif
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
@@ -1105,28 +1129,53 @@ char MaskedStoreOptPass::ID = 0;
llvm::RegisterPass<MaskedStoreOptPass> mss("masked-store-scalarize",
"Masked Store Scalarize Pass");
struct MSInfo {
MSInfo(const char *name, const int a)
: align(a) {
func = m->module->getFunction(name);
assert(func != NULL);
}
llvm::Function *func;
const int align;
};
bool
MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::Function *pms32Func = m->module->getFunction("__pseudo_masked_store_32");
llvm::Function *pms64Func = m->module->getFunction("__pseudo_masked_store_64");
llvm::Function *msb32Func = m->module->getFunction("__masked_store_blend_32");
llvm::Function *msb64Func = m->module->getFunction("__masked_store_blend_64");
llvm::Function *ms32Func = m->module->getFunction("__masked_store_32");
llvm::Function *ms64Func = m->module->getFunction("__masked_store_64");
MSInfo msInfo[] = {
MSInfo("__pseudo_masked_store_8", 1),
MSInfo("__pseudo_masked_store_16", 2),
MSInfo("__pseudo_masked_store_32", 4),
MSInfo("__pseudo_masked_store_64", 8),
MSInfo("__masked_store_blend_8", 1),
MSInfo("__masked_store_blend_16", 2),
MSInfo("__masked_store_blend_32", 4),
MSInfo("__masked_store_blend_64", 8),
MSInfo("__masked_store_8", 1),
MSInfo("__masked_store_16", 2),
MSInfo("__masked_store_32", 4),
MSInfo("__masked_store_64", 8)
};
bool modifiedAny = false;
restart:
// Iterate over all of the instructions to look for one of the various
// masked store functions
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
if (!callInst)
continue;
llvm::Function *called = callInst->getCalledFunction();
if (called != pms32Func && called != pms64Func &&
called != msb32Func && called != msb64Func &&
called != ms32Func && called != ms64Func)
int nMSFuncs = sizeof(msInfo) / sizeof(msInfo[0]);
MSInfo *info = NULL;
for (int i = 0; i < nMSFuncs; ++i) {
if (called == msInfo[i].func) {
info = &msInfo[i];
break;
}
}
if (info == NULL)
continue;
// Got one; grab the operands
@@ -1150,15 +1199,12 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
LLVM_TYPE_CONST llvm::Type *rvalueType = rvalue->getType();
LLVM_TYPE_CONST llvm::Type *ptrType =
llvm::PointerType::get(rvalueType, 0);
// Need to update this when int8/int16 are added
int align = (called == pms32Func || called == pms64Func ||
called == msb32Func) ? 4 : 8;
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
lCopyMetadata(lvalue, callInst);
llvm::Instruction *store =
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
align);
info->align);
lCopyMetadata(store, callInst);
llvm::ReplaceInstWithInst(callInst, store);
@@ -1180,9 +1226,9 @@ CreateMaskedStoreOptPass() {
// LowerMaskedStorePass
/** When the front-end needs to do a masked store, it emits a
__pseudo_masked_store_{32,64} call as a placeholder. This pass lowers
these calls to either __masked_store_{32,64} or
__masked_store_blend_{32,64} calls.
__pseudo_masked_store_{8,16,32,64} call as a placeholder. This pass
lowers these calls to either __masked_store_{8,16,32,64} or
__masked_store_blend_{8,16,32,64} calls.
*/
class LowerMaskedStorePass : public llvm::BasicBlockPass {
public:
@@ -1227,45 +1273,51 @@ lIsStackVariablePointer(llvm::Value *lvalue) {
}
/** Utilty routine to figure out which masked store function to use. The
blend parameter indicates if we want the blending version, is32
indicates if the element size is 32 bits.
*/
static const char *
lMaskedStoreName(bool blend, bool is32) {
if (blend) {
if (is32)
return "__masked_store_blend_32";
else
return "__masked_store_blend_64";
struct LMSInfo {
LMSInfo(const char *pname, const char *bname, const char *msname) {
pseudoFunc = m->module->getFunction(pname);
blendFunc = m->module->getFunction(bname);
maskedStoreFunc = m->module->getFunction(msname);
assert(pseudoFunc != NULL && blendFunc != NULL &&
maskedStoreFunc != NULL);
}
else {
if (is32)
return "__masked_store_32";
else
return "__masked_store_64";
}
}
llvm::Function *pseudoFunc;
llvm::Function *blendFunc;
llvm::Function *maskedStoreFunc;
};
bool
LowerMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::Function *maskedStore32Func = m->module->getFunction("__pseudo_masked_store_32");
llvm::Function *maskedStore64Func = m->module->getFunction("__pseudo_masked_store_64");
assert(maskedStore32Func && maskedStore64Func);
LMSInfo msInfo[] = {
LMSInfo("__pseudo_masked_store_8", "__masked_store_blend_8",
"__masked_store_8"),
LMSInfo("__pseudo_masked_store_16", "__masked_store_blend_16",
"__masked_store_16"),
LMSInfo("__pseudo_masked_store_32", "__masked_store_blend_32",
"__masked_store_32"),
LMSInfo("__pseudo_masked_store_64", "__masked_store_blend_64",
"__masked_store_64")
};
bool modifiedAny = false;
restart:
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
// Iterate through all of the instructions and look for
// __pseudo_masked_store_* calls.
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
if (!callInst ||
(callInst->getCalledFunction() != maskedStore32Func &&
callInst->getCalledFunction() != maskedStore64Func))
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
if (callInst == NULL)
continue;
LMSInfo *info = NULL;
for (unsigned int i = 0; i < sizeof(msInfo) / sizeof(msInfo[0]); ++i) {
if (callInst->getCalledFunction() == msInfo[i].pseudoFunc) {
info = &msInfo[i];
break;
}
}
if (info == NULL)
continue;
bool is32 = (callInst->getCalledFunction() == maskedStore32Func);
llvm::Value *lvalue = callInst->getArgOperand(0);
llvm::Value *rvalue = callInst->getArgOperand(1);
llvm::Value *mask = callInst->getArgOperand(2);
@@ -1282,8 +1334,7 @@ LowerMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
// Generate the call to the appropriate masked store function and
// replace the __pseudo_* one with it.
llvm::Function *fms = m->module->getFunction(lMaskedStoreName(doBlend, is32));
assert(fms);
llvm::Function *fms = doBlend ? info->blendFunc : info->maskedStoreFunc;
llvm::Value *args[3] = { lvalue, rvalue, mask };
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[3]);
@@ -1872,37 +1923,94 @@ lVectorIsLinear(llvm::Value *v[ISPC_MAX_NVEC], int stride) {
}
struct GatherImpInfo {
GatherImpInfo(const char *pName, const char *lbName, const char *lmName,
int a)
: align(a) {
pseudoFunc = m->module->getFunction(pName);
loadBroadcastFunc = m->module->getFunction(lbName);
loadMaskedFunc = m->module->getFunction(lmName);
assert(pseudoFunc != NULL && loadBroadcastFunc != NULL &&
loadMaskedFunc != NULL);
}
llvm::Function *pseudoFunc;
llvm::Function *loadBroadcastFunc;
llvm::Function *loadMaskedFunc;
const int align;
};
struct ScatterImpInfo {
ScatterImpInfo(const char *pName, const char *msName,
LLVM_TYPE_CONST llvm::Type *vpt, int a)
: align(a) {
pseudoFunc = m->module->getFunction(pName);
maskedStoreFunc = m->module->getFunction(msName);
vecPtrType = vpt;
assert(pseudoFunc != NULL && maskedStoreFunc != NULL);
}
llvm::Function *pseudoFunc;
llvm::Function *maskedStoreFunc;
LLVM_TYPE_CONST llvm::Type *vecPtrType;
const int align;
};
bool
GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_base_offsets_32");
llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_base_offsets_64");
llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_base_offsets_32");
llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_base_offsets_64");
assert(gather32Func && gather64Func && scatter32Func && scatter64Func);
GatherImpInfo gInfo[] = {
GatherImpInfo("__pseudo_gather_base_offsets_8", "__load_and_broadcast_8",
"__load_masked_8", 1),
GatherImpInfo("__pseudo_gather_base_offsets_16", "__load_and_broadcast_16",
"__load_masked_16", 2),
GatherImpInfo("__pseudo_gather_base_offsets_32", "__load_and_broadcast_32",
"__load_masked_32", 4),
GatherImpInfo("__pseudo_gather_base_offsets_64", "__load_and_broadcast_64",
"__load_masked_64", 8)
};
ScatterImpInfo sInfo[] = {
ScatterImpInfo("__pseudo_scatter_base_offsets_8", "__pseudo_masked_store_8",
LLVMTypes::Int8VectorPointerType, 1),
ScatterImpInfo("__pseudo_scatter_base_offsets_16", "__pseudo_masked_store_16",
LLVMTypes::Int16VectorPointerType, 2),
ScatterImpInfo("__pseudo_scatter_base_offsets_32", "__pseudo_masked_store_32",
LLVMTypes::Int32VectorPointerType, 4),
ScatterImpInfo("__pseudo_scatter_base_offsets_64", "__pseudo_masked_store_64",
LLVMTypes::Int64VectorPointerType, 8)
};
bool modifiedAny = false;
restart:
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
// Iterate over all of the instructions and look for calls to
// __pseudo_*_base_offsets_* calls.
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
if (!callInst ||
(callInst->getCalledFunction() != gather32Func &&
callInst->getCalledFunction() != gather64Func &&
callInst->getCalledFunction() != scatter32Func &&
callInst->getCalledFunction() != scatter64Func))
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
if (callInst == NULL)
continue;
llvm::Function *calledFunc = callInst->getCalledFunction();
GatherImpInfo *gatherInfo = NULL;
ScatterImpInfo *scatterInfo = NULL;
for (unsigned int i = 0; i < sizeof(gInfo) / sizeof(gInfo[0]); ++i) {
if (calledFunc == gInfo[i].pseudoFunc) {
gatherInfo = &gInfo[i];
break;
}
}
for (unsigned int i = 0; i < sizeof(sInfo) / sizeof(sInfo[0]); ++i) {
if (calledFunc == sInfo[i].pseudoFunc) {
scatterInfo = &sInfo[i];
break;
}
}
if (gatherInfo == NULL && scatterInfo == NULL)
continue;
SourcePos pos;
bool ok = lGetSourcePosFromMetadata(callInst, &pos);
assert(ok);
bool isGather = (callInst->getCalledFunction() == gather32Func ||
callInst->getCalledFunction() == gather64Func);
bool is32 = (callInst->getCalledFunction() == gather32Func ||
callInst->getCalledFunction() == scatter32Func);
// Get the actual base pointer; note that it comes into the gather
// or scatter function bitcast to an i8 *, so we need to work back
// to get the pointer as the original type.
@@ -1921,7 +2029,7 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
if (!lScalarizeVector(callInst->getArgOperand(1), offsetElements))
continue;
llvm::Value *mask = callInst->getArgOperand(isGather ? 2 : 3);
llvm::Value *mask = callInst->getArgOperand((gatherInfo != NULL) ? 2 : 3);
if (lVectorValuesAllEqual(offsetElements)) {
// If all the offsets are equal, then compute the single
@@ -1929,14 +2037,15 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
// (arbitrarily).
llvm::Value *indices[1] = { offsetElements[0] };
llvm::Value *basei8 =
new llvm::BitCastInst(base, LLVMTypes::VoidPointerType, "base2i8", callInst);
new llvm::BitCastInst(base, LLVMTypes::VoidPointerType,
"base2i8", callInst);
lCopyMetadata(basei8, callInst);
llvm::Value *ptr =
llvm::GetElementPtrInst::Create(basei8, &indices[0], &indices[1],
"ptr", callInst);
lCopyMetadata(ptr, callInst);
if (isGather) {
if (gatherInfo != NULL) {
// A gather with everyone going to the same location is
// handled as a scalar load and broadcast across the lanes.
// Note that we do still have to pass the mask to the
@@ -1944,20 +2053,16 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
// access memory if the mask is all off (the location may
// be invalid in that case).
Debug(pos, "Transformed gather to scalar load and broadcast!");
llvm::Function *loadBroadcast =
m->module->getFunction(is32 ? "__load_and_broadcast_32" :
"__load_and_broadcast_64");
assert(loadBroadcast);
llvm::Value *args[2] = { ptr, mask };
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[2]);
llvm::Instruction *newCall =
llvm::CallInst::Create(loadBroadcast, newArgArray,
llvm::CallInst::Create(gatherInfo->loadBroadcastFunc, newArgArray,
"load_broadcast", (llvm::Instruction *)NULL);
#else
llvm::Instruction *newCall =
llvm::CallInst::Create(loadBroadcast, &args[0], &args[2],
"load_broadcast");
llvm::CallInst::Create(gatherInfo->loadBroadcastFunc, &args[0],
&args[2], "load_broadcast");
#endif
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
@@ -1977,8 +2082,8 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(first->getType(), 0),
"ptr2rvalue_type", callInst);
lCopyMetadata(ptr, callInst);
llvm::Instruction *sinst =
new llvm::StoreInst(first, ptr, false, is32 ? 4 : 8 /* align */);
llvm::Instruction *sinst = new llvm::StoreInst(first, ptr, false,
scatterInfo->align);
lCopyMetadata(sinst, callInst);
llvm::ReplaceInstWithInst(callInst, sinst);
}
@@ -1987,7 +2092,8 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
goto restart;
}
if (lVectorIsLinear(offsetElements, is32 ? 4 : 8)) {
int step = gatherInfo ? gatherInfo->align : scatterInfo->align;
if (lVectorIsLinear(offsetElements, step)) {
// We have a linear sequence of memory locations being accessed
// starting with the location given by the offset from
// offsetElements[0], with stride of 4 or 8 bytes (for 32 bit
@@ -2003,53 +2109,38 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
"ptr", callInst);
lCopyMetadata(ptr, callInst);
if (isGather) {
if (gatherInfo != NULL) {
Debug(pos, "Transformed gather to unaligned vector load!");
// FIXME: make this an aligned load when possible..
// FIXME: are there lurking potential bugs when e.g. the
// last few entries of the mask are off and the load ends
// up straddling a page boundary?
llvm::Function *loadMasked =
m->module->getFunction(is32 ? "__load_masked_32" : "__load_masked_64");
assert(loadMasked);
llvm::Value *args[2] = { ptr, mask };
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[2]);
llvm::Instruction *newCall =
llvm::CallInst::Create(loadMasked, argArray, "load_masked",
(llvm::Instruction *)NULL);
llvm::CallInst::Create(gatherInfo->loadMaskedFunc, argArray,
"load_masked", (llvm::Instruction *)NULL);
#else
llvm::Instruction *newCall =
llvm::CallInst::Create(loadMasked, &args[0], &args[2], "load_masked");
llvm::CallInst::Create(gatherInfo->loadMaskedFunc, &args[0],
&args[2], "load_masked");
#endif
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
else {
Debug(pos, "Transformed scatter to unaligned vector store!");
// FIXME: make this an aligned store when possible. Need
// to work through the messiness of issuing a pseudo store
// here.
llvm::Value *rvalue = callInst->getArgOperand(2);
llvm::Function *storeMasked =
m->module->getFunction(is32 ? "__pseudo_masked_store_32" :
"__pseudo_masked_store_64");
assert(storeMasked);
LLVM_TYPE_CONST llvm::Type *vecPtrType = is32 ?
LLVMTypes::Int32VectorPointerType : LLVMTypes::Int64VectorPointerType;
ptr = new llvm::BitCastInst(ptr, vecPtrType, "ptrcast", callInst);
ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast",
callInst);
llvm::Value *args[3] = { ptr, rvalue, mask };
#if defined(LLVM_3_0) || defined(LLVM_3_0svn)
llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[3]);
llvm::Instruction *newCall =
llvm::CallInst::Create(storeMasked, argArray, "",
(llvm::Instruction *)NULL);
llvm::CallInst::Create(scatterInfo->maskedStoreFunc, argArray,
"", (llvm::Instruction *)NULL);
#else
llvm::Instruction *newCall =
llvm::CallInst::Create(storeMasked, &args[0], &args[3], "");
llvm::CallInst::Create(scatterInfo->maskedStoreFunc,
&args[0], &args[3], "");
#endif
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
@@ -2097,31 +2188,50 @@ char LowerGSPass::ID = 0;
llvm::RegisterPass<LowerGSPass> lgs("lower-gs",
"Lower Gather/Scatter Pass");
struct LowerGSInfo {
LowerGSInfo(const char *pName, const char *aName, bool ig)
: isGather(ig) {
pseudoFunc = m->module->getFunction(pName);
actualFunc = m->module->getFunction(aName);
assert(pseudoFunc != NULL && actualFunc != NULL);
}
llvm::Function *pseudoFunc;
llvm::Function *actualFunc;
const bool isGather;
};
bool
LowerGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::Function *gather32Func = m->module->getFunction("__pseudo_gather_base_offsets_32");
llvm::Function *gather64Func = m->module->getFunction("__pseudo_gather_base_offsets_64");
llvm::Function *scatter32Func = m->module->getFunction("__pseudo_scatter_base_offsets_32");
llvm::Function *scatter64Func = m->module->getFunction("__pseudo_scatter_base_offsets_64");
assert(gather32Func && gather64Func && scatter32Func && scatter64Func);
LowerGSInfo lgsInfo[] = {
LowerGSInfo("__pseudo_gather_base_offsets_8", "__gather_base_offsets_i8", true),
LowerGSInfo("__pseudo_gather_base_offsets_16", "__gather_base_offsets_i16", true),
LowerGSInfo("__pseudo_gather_base_offsets_32", "__gather_base_offsets_i32", true),
LowerGSInfo("__pseudo_gather_base_offsets_32", "__gather_base_offsets_i32", true),
LowerGSInfo("__pseudo_scatter_base_offsets_8", "__scatter_base_offsets_i8", false),
LowerGSInfo("__pseudo_scatter_base_offsets_16", "__scatter_base_offsets_i16", false),
LowerGSInfo("__pseudo_scatter_base_offsets_32", "__scatter_base_offsets_i32", false),
LowerGSInfo("__pseudo_scatter_base_offsets_32", "__scatter_base_offsets_i32", false)
};
bool modifiedAny = false;
restart:
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
// Loop over the instructions and find calls to the
// __pseudo_*_base_offsets_* functions.
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*i);
if (!callInst ||
(callInst->getCalledFunction() != gather32Func &&
callInst->getCalledFunction() != gather64Func &&
callInst->getCalledFunction() != scatter32Func &&
callInst->getCalledFunction() != scatter64Func))
llvm::CallInst *callInst = llvm::dyn_cast<llvm::CallInst>(&*iter);
if (callInst == NULL)
continue;
llvm::Function *calledFunc = callInst->getCalledFunction();
LowerGSInfo *info = NULL;
for (unsigned int i = 0; i < sizeof(lgsInfo) / sizeof(lgsInfo[0]); ++i) {
if (calledFunc == lgsInfo[i].pseudoFunc) {
info = &lgsInfo[i];
break;
}
}
if (info == NULL)
continue;
bool isGather = (callInst->getCalledFunction() == gather32Func ||
callInst->getCalledFunction() == gather64Func);
bool is32 = (callInst->getCalledFunction() == gather32Func ||
callInst->getCalledFunction() == scatter32Func);
// Get the source position from the metadata attached to the call
// instruction so that we can issue PerformanceWarning()s below.
@@ -2129,20 +2239,11 @@ LowerGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
bool ok = lGetSourcePosFromMetadata(callInst, &pos);
assert(ok);
if (isGather) {
llvm::Function *gFunc = m->module->getFunction(is32 ? "__gather_base_offsets_i32" :
"__gather_base_offsets_i64");
assert(gFunc);
callInst->setCalledFunction(gFunc);
callInst->setCalledFunction(info->actualFunc);
if (info->isGather)
PerformanceWarning(pos, "Gather required to compute value in expression.");
}
else {
llvm::Function *sFunc = m->module->getFunction(is32 ? "__scatter_base_offsets_i32" :
"__scatter_base_offsets_i64");
assert(sFunc);
callInst->setCalledFunction(sFunc);
else
PerformanceWarning(pos, "Scatter required for storing value.");
}
modifiedAny = true;
goto restart;
}
@@ -2286,25 +2387,41 @@ char MakeInternalFuncsStaticPass::ID = 0;
llvm::RegisterPass<MakeInternalFuncsStaticPass>
mifsp("make-internal-funcs-static", "Make Internal Funcs Static Pass");
bool
MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
const char *names[] = {
"__do_print", "__gather_base_offsets_i32", "__gather_base_offsets_i64",
"__gather_elt_32", "__gather_elt_64", "__load_and_broadcast_32",
"__load_and_broadcast_64", "__load_masked_32", "__load_masked_64",
"__masked_store_32", "__masked_store_64", "__masked_store_blend_32",
"__masked_store_blend_64", "__packed_load_active", "__packed_store_active",
"__scatter_base_offsets_i32", "__scatter_base_offsets_i64", "__scatter_elt_32",
"__scatter_elt_64", };
"__do_print",
"__gather_base_offsets_i8", "__gather_base_offsets_i16",
"__gather_base_offsets_i32", "__gather_base_offsets_i64",
"__gather_elt_8", "__gather_elt_16",
"__gather_elt_32", "__gather_elt_64",
"__load_and_broadcast_8", "__load_and_broadcast_16",
"__load_and_broadcast_32", "__load_and_broadcast_64",
"__load_masked_8", "__load_masked_16",
"__load_masked_32", "__load_masked_64",
"__masked_store_8", "__masked_store_16",
"__masked_store_32", "__masked_store_64",
"__masked_store_blend_8", "__masked_store_blend_16",
"__masked_store_blend_32", "__masked_store_blend_64",
"__packed_load_active", "__packed_store_active",
"__scatter_base_offsets_i8", "__scatter_base_offsets_i16",
"__scatter_base_offsets_i32", "__scatter_base_offsets_i64",
"__scatter_elt_8", "__scatter_elt_16",
"__scatter_elt_32", "__scatter_elt_64",
};
bool modifiedAny = false;
int count = sizeof(names) / sizeof(names[0]);
for (int i = 0; i < count; ++i) {
llvm::Function *f = m->module->getFunction(names[i]);
if (f != NULL)
if (f != NULL) {
f->setLinkage(llvm::GlobalValue::PrivateLinkage);
modifiedAny = true;
}
}
return true;
return modifiedAny;
}