Merge pull request #477 from dbabokin/broadcast

One more opportunity to do better broadcast
This commit is contained in:
jbrodman
2013-04-18 07:57:47 -07:00
2 changed files with 19 additions and 13 deletions

View File

@@ -3187,9 +3187,9 @@ FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type* vecType,
name = LLVMGetName(v, buf); name = LLVMGetName(v, buf);
} }
// Generate the follwoing sequence: // Generate the following sequence:
// %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0 // %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0
// %name.i = shufflevector <4 x i32> %smear.0, <4 x i32> undef, // %name.i = shufflevector <4 x i32> %name_init.i, <4 x i32> undef,
// <4 x i32> zeroinitializer // <4 x i32> zeroinitializer
llvm::Value *undef1 = llvm::UndefValue::get(vecType); llvm::Value *undef1 = llvm::UndefValue::get(vecType);

28
opt.cpp
View File

@@ -2457,18 +2457,24 @@ lGSToLoadStore(llvm::CallInst *callInst) {
ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(gatherInfo->scalarType, 0), ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(gatherInfo->scalarType, 0),
ptr->getName(), callInst); ptr->getName(), callInst);
llvm::Value *scalarValue = new llvm::LoadInst(ptr, callInst->getName(), callInst); llvm::Value *scalarValue = new llvm::LoadInst(ptr, callInst->getName(), callInst);
llvm::Value *vecValue = llvm::UndefValue::get(callInst->getType());
for (int i = 0; i < g->target->getVectorWidth(); ++i) { // Generate the following sequence:
if (i < g->target->getVectorWidth() - 1) // %name123 = insertelement <4 x i32> undef, i32 %val, i32 0
vecValue = llvm::InsertElementInst::Create(vecValue, scalarValue, LLVMInt32(i), // %name124 = shufflevector <4 x i32> %name123, <4 x i32> undef,
callInst->getName(), callInst); // <4 x i32> zeroinitializer
else llvm::Value *undef1Value = llvm::UndefValue::get(callInst->getType());
vecValue = llvm::InsertElementInst::Create(vecValue, scalarValue, LLVMInt32(i), llvm::Value *undef2Value = llvm::UndefValue::get(callInst->getType());
callInst->getName()); llvm::Value *insertVec = llvm::InsertElementInst::Create(
} undef1Value, scalarValue, LLVMInt32(0), callInst->getName(), callInst);
lCopyMetadata(vecValue, callInst); llvm::Value *zeroMask = llvm::ConstantVector::getSplat(
callInst->getType()->getVectorNumElements(),
llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
llvm::Value *shufValue = new llvm::ShuffleVectorInst(
insertVec, undef2Value, zeroMask, callInst->getName());
lCopyMetadata(shufValue, callInst);
llvm::ReplaceInstWithInst(callInst, llvm::ReplaceInstWithInst(callInst,
llvm::dyn_cast<llvm::Instruction>(vecValue)); llvm::dyn_cast<llvm::Instruction>(shufValue));
return true; return true;
} }
else { else {