From cb650d61005b5e363f8088350b0e5b33742401f1 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Wed, 17 Apr 2013 20:56:32 +0400 Subject: [PATCH] One more opportunity to do better broadcast --- opt.cpp | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/opt.cpp b/opt.cpp index 687aa507..f4ea620a 100644 --- a/opt.cpp +++ b/opt.cpp @@ -2457,18 +2457,24 @@ lGSToLoadStore(llvm::CallInst *callInst) { ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(gatherInfo->scalarType, 0), ptr->getName(), callInst); llvm::Value *scalarValue = new llvm::LoadInst(ptr, callInst->getName(), callInst); - llvm::Value *vecValue = llvm::UndefValue::get(callInst->getType()); - for (int i = 0; i < g->target->getVectorWidth(); ++i) { - if (i < g->target->getVectorWidth() - 1) - vecValue = llvm::InsertElementInst::Create(vecValue, scalarValue, LLVMInt32(i), - callInst->getName(), callInst); - else - vecValue = llvm::InsertElementInst::Create(vecValue, scalarValue, LLVMInt32(i), - callInst->getName()); - } - lCopyMetadata(vecValue, callInst); + + // Generate the follwoing sequence: + // %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0 + // %name.i = shufflevector <4 x i32> %smear.0, <4 x i32> undef, + // <4 x i32> zeroinitializer + llvm::Value *undef1Value = llvm::UndefValue::get(callInst->getType()); + llvm::Value *undef2Value = llvm::UndefValue::get(callInst->getType()); + llvm::Value *insertVec = llvm::InsertElementInst::Create( + undef1Value, scalarValue, LLVMInt32(0), callInst->getName(), callInst); + llvm::Value *zeroMask = llvm::ConstantVector::getSplat( + callInst->getType()->getVectorNumElements(), + llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx))); + llvm::Value *shufValue = new llvm::ShuffleVectorInst( + insertVec, undef2Value, zeroMask, callInst->getName()); + + lCopyMetadata(shufValue, callInst); llvm::ReplaceInstWithInst(callInst, - llvm::dyn_cast(vecValue)); + llvm::dyn_cast(shufValue)); return true; } else {