diff --git a/llvmutil.cpp b/llvmutil.cpp index 75d832c5..cfcdf113 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -39,7 +39,9 @@ #include "ispc.h" #include "type.h" #include +#include #include +#include LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL; LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL; @@ -784,6 +786,96 @@ LLVMDumpValue(llvm::Value *v) { } +static llvm::Value * +lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore, + std::map &phiMap) { + // If it's not an instruction (i.e. is a constant), then we can just + // emit an extractelement instruction and let the regular optimizer do + // the rest. + if (llvm::isa(v) == false) + return llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt", + insertBefore); + + LLVM_TYPE_CONST llvm::VectorType *vt = + llvm::dyn_cast(v->getType()); + Assert(vt != NULL); + + llvm::Twine newName = v->getName() + llvm::Twine(".elt0"); + + // Rewrite regular binary operators and casts to the scalarized + // equivalent. + llvm::BinaryOperator *bop = llvm::dyn_cast(v); + if (bop != NULL) { + llvm::Value *v0 = lExtractFirstVectorElement(bop->getOperand(0), + insertBefore, phiMap); + llvm::Value *v1 = lExtractFirstVectorElement(bop->getOperand(1), + insertBefore, phiMap); + return llvm::BinaryOperator::Create(bop->getOpcode(), v0, v1, + newName, insertBefore); + } + + llvm::CastInst *cast = llvm::dyn_cast(v); + if (cast != NULL) { + llvm::Value *v = lExtractFirstVectorElement(cast->getOperand(0), + insertBefore, phiMap); + return llvm::CastInst::Create(cast->getOpcode(), v, + vt->getElementType(), newName, + insertBefore); + } + + llvm::PHINode *phi = llvm::dyn_cast(v); + if (phi != NULL) { + // For PHI notes, recursively scalarize them. + if (phiMap.find(phi) != phiMap.end()) + return phiMap[phi]; + + // We need to create the new scalar PHI node immediately, though, + // and put it in the map<>, so that if we come back to this node + // via a recursive lExtractFirstVectorElement() call, then we can + // return the pointer and not get stuck in an infinite loop. + // + // The insertion point for the new phi node also has to be the + // start of the bblock of the original phi node, which isn't + // necessarily the same bblock as insertBefore is in! + llvm::Instruction *phiInsertPos = phi->getParent()->begin(); + llvm::PHINode *scalarPhi = + llvm::PHINode::Create(vt->getElementType(), + phi->getNumIncomingValues(), newName, + phiInsertPos); + phiMap[phi] = scalarPhi; + + for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) { + llvm::Value *v = lExtractFirstVectorElement(phi->getIncomingValue(i), + insertBefore, phiMap); + scalarPhi->addIncoming(v, phi->getIncomingBlock(i)); + } + + return scalarPhi; + } + + // If we have a chain of insertelement instructions, then we can just + // flatten them out and grab the value for the first one. + llvm::InsertElementInst *ie = llvm::dyn_cast(v); + if (ie != NULL) { + llvm::Value *elements[ISPC_MAX_NVEC]; + LLVMFlattenInsertChain(ie, vt->getNumElements(), elements); + return elements[0]; + } + + // Worst case, for everything else, just do a regular extract element + return llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt", + insertBefore); +} + + +llvm::Value * +LLVMExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore) { + std::map phiMap; + llvm::Value *ret = lExtractFirstVectorElement(v, insertBefore, phiMap); + return ret; +} + + /** Given two vectors of the same type, concatenate them into a vector that has twice as many elements, where the first half has the elements from the first vector and the second half has the elements from the second diff --git a/llvmutil.h b/llvmutil.h index 1f696904..41f98d96 100644 --- a/llvmutil.h +++ b/llvmutil.h @@ -239,6 +239,17 @@ void LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth, on. */ extern void LLVMDumpValue(llvm::Value *v); +/** Given a vector-typed value, this function returns the value of its + first element. Rather than just doing the straightforward thing of + using a single extractelement instruction to do this, this function + tries to rewrite the computation for the first element in scalar form; + this is generally more efficient than computing the entire vector's + worth of values just to extract the first element, in cases where only + the first element's value is needed. + */ +extern llvm::Value *LLVMExtractFirstVectorElement(llvm::Value *v, + llvm::Instruction *insertBefore); + /** This function takes two vectors, expected to be the same length, and returns a new vector of twice the length that represents concatenating the two of them. */ diff --git a/opt.cpp b/opt.cpp index f4adc6bd..e2efe9a5 100644 --- a/opt.cpp +++ b/opt.cpp @@ -2531,9 +2531,8 @@ struct GatherImpInfo { static llvm::Value * lComputeCommonPointer(llvm::Value *base, llvm::Value *offsets, llvm::Instruction *insertBefore) { - llvm::Value *firstOffset = - llvm::ExtractElementInst::Create(offsets, LLVMInt32(0), "first_offset", - insertBefore); + llvm::Value *firstOffset = LLVMExtractFirstVectorElement(offsets, + insertBefore); return lGEPInst(base, firstOffset, "ptr", insertBefore); } @@ -3524,9 +3523,8 @@ lComputeBasePtr(llvm::CallInst *gatherInst, llvm::Instruction *insertBefore) { // All of the variable offsets values should be the same, due to // checking for this in GatherCoalescePass::runOnBasicBlock(). Thus, // extract the first value and use that as a scalar. - llvm::Value *variable = - llvm::ExtractElementInst::Create(variableOffsets, LLVMInt32(0), - "variable0", insertBefore); + llvm::Value *variable = LLVMExtractFirstVectorElement(variableOffsets, + insertBefore); if (variable->getType() == LLVMTypes::Int64Type) offsetScale = new llvm::ZExtInst(offsetScale, LLVMTypes::Int64Type, "scale_to64", insertBefore);