diff --git a/cbackend.cpp b/cbackend.cpp index df8b4f04..40d0ab8f 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -4395,16 +4395,21 @@ public: static char ID; llvm::Module *module; - int vectorWidth; + unsigned int vectorWidth; + +private: + unsigned int ChainLength(llvm::InsertElementInst *inst) const; + llvm::Value *getInsertChainSmearValue(llvm::Instruction* inst) const; + llvm::Value *getShuffleSmearValue(llvm::Instruction* inst) const; }; char SmearCleanupPass::ID = 0; -static int -lChainLength(llvm::InsertElementInst *inst) { - int length = 0; +unsigned int +SmearCleanupPass::ChainLength(llvm::InsertElementInst *inst) const { + unsigned int length = 0; while (inst != NULL) { ++length; inst = llvm::dyn_cast(inst->getOperand(0)); @@ -4413,45 +4418,105 @@ lChainLength(llvm::InsertElementInst *inst) { } +llvm::Value * +SmearCleanupPass::getInsertChainSmearValue(llvm::Instruction* inst) const { + // TODO: we don't check indexes where we do insertion, so we may trigger + // transformation for a wrong chain. + // This way of doing broadcast is obsolete and should be probably removed + // some day. + + llvm::InsertElementInst *insertInst = + llvm::dyn_cast(inst); + if (!insertInst) { + return NULL; + } + + // We consider only chians of vectorWidth length. + if (ChainLength(insertInst) != vectorWidth) { + return NULL; + } + + // FIXME: we only want to do this to vectors with width equal to + // the target vector width. But we can't easily get that here, so + // for now we at least avoid one case where we definitely don't + // want to do this. + llvm::VectorType *vt = llvm::dyn_cast(insertInst->getType()); + if (vt->getNumElements() == 1) { + return NULL; + } + + llvm::Value *smearValue = NULL; + while (insertInst != NULL) { + // operand 1 is inserted value + llvm::Value *insertValue = insertInst->getOperand(1); + if (smearValue == NULL) { + smearValue = insertValue; + } + else if (smearValue != insertValue) { + return NULL; + } + + // operand 0 is a vector to insert into. + insertInst = + llvm::dyn_cast(insertInst->getOperand(0)); + } + assert(smearValue != NULL); + + return smearValue; +} + + +llvm::Value * +SmearCleanupPass::getShuffleSmearValue(llvm::Instruction* inst) const { + llvm::ShuffleVectorInst *shuffleInst = + llvm::dyn_cast(inst); + if (!shuffleInst) { + return NULL; + } + + llvm::Constant* mask = + llvm::dyn_cast(shuffleInst->getOperand(2)); + + // Check that the shuffle is a broadcast of the first element of the first vector, + // i.e. mask vector is all-zeros vector of expected size. + if (!(mask && + mask->isNullValue() && + llvm::dyn_cast(mask->getType())->getNumElements() == vectorWidth)) { + return NULL; + } + + + llvm::InsertElementInst *insertInst = + llvm::dyn_cast(shuffleInst->getOperand(0)); + + // Check that it's an InsertElementInst that inserts a value to first element. + if (!(insertInst && + llvm::isa(insertInst->getOperand(2)) && + llvm::dyn_cast(insertInst->getOperand(2))->isNullValue())) { + return NULL; + } + + llvm::Value *result = insertInst->getOperand(1); + + return result; +} + + bool SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) { bool modifiedAny = false; restart: for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) { - llvm::InsertElementInst *insertInst = - llvm::dyn_cast(&*iter); - if (insertInst == NULL) + llvm::Value *smearValue = NULL; + + if (!(smearValue = getInsertChainSmearValue(iter)) && + !(smearValue = getShuffleSmearValue(iter))) { continue; - - // Only do this on the last insert in a chain... - if (lChainLength(insertInst) != vectorWidth) - continue; - - // FIXME: we only want to do this to vectors with width equal to - // the target vector width. But we can't easily get that here, so - // for now we at least avoid one case where we definitely don't - // want to do this. - llvm::VectorType *vt = llvm::dyn_cast(insertInst->getType()); - if (vt->getNumElements() == 1) - continue; - - llvm::Value *toMatch = NULL; - while (insertInst != NULL) { - llvm::Value *insertValue = insertInst->getOperand(1); - if (toMatch == NULL) - toMatch = insertValue; - else if (toMatch != insertValue) - goto not_equal; - - insertInst = - llvm::dyn_cast(insertInst->getOperand(0)); } - assert(toMatch != NULL); - { - llvm::Type *matchType = toMatch->getType(); - const char *smearFuncName = lGetTypedFunc("smear", matchType, vectorWidth); + llvm::Type *smearType = smearValue->getType(); + const char *smearFuncName = lGetTypedFunc("smear", smearType, vectorWidth); if (smearFuncName != NULL) { llvm::Function *smearFunc = module->getFunction(smearFuncName); if (smearFunc == NULL) { @@ -4460,7 +4525,7 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) { // parameter type. llvm::Constant *sf = module->getOrInsertFunction(smearFuncName, iter->getType(), - matchType, NULL); + smearType, NULL); smearFunc = llvm::dyn_cast(sf); assert(smearFunc != NULL); #if defined(LLVM_3_1) @@ -4473,10 +4538,10 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) { } assert(smearFunc != NULL); - llvm::Value *args[1] = { toMatch }; + llvm::Value *args[1] = { smearValue }; llvm::ArrayRef argArray(&args[0], &args[1]); llvm::Instruction *smearCall = - llvm::CallInst::Create(smearFunc, argArray, LLVMGetName(toMatch, "_smear"), + llvm::CallInst::Create(smearFunc, argArray, LLVMGetName(smearValue, "_smear"), (llvm::Instruction *)NULL); ReplaceInstWithInst(iter, smearCall); @@ -4484,9 +4549,6 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) { modifiedAny = true; goto restart; } - } - not_equal: - ; } return modifiedAny; diff --git a/ctx.cpp b/ctx.cpp index 6f4b6bcf..5c8b16f2 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1379,6 +1379,19 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) { #endif } +llvm::Value * +FunctionEmitContext::ProgramIndexVector(bool is32bits) { + llvm::SmallVector array; + for (int i = 0; i < g->target->getVectorWidth() ; ++i) { + llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i); + array.push_back(C); + } + + llvm::Constant* index = llvm::ConstantVector::get(array); + + return index; +} + llvm::Value * FunctionEmitContext::GetStringPtr(const std::string &str) { @@ -1729,26 +1742,31 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) { llvm::Value *ret = NULL; llvm::Type *eltType = value->getType(); + llvm::Type *vecType = NULL; llvm::PointerType *pt = llvm::dyn_cast(eltType); if (pt != NULL) { // Varying pointers are represented as vectors of i32/i64s - ret = llvm::UndefValue::get(LLVMTypes::VoidPointerVectorType); + vecType = LLVMTypes::VoidPointerVectorType; value = PtrToIntInst(value); } - else + else { // All other varying types are represented as vectors of the // underlying type. - ret = llvm::UndefValue::get(llvm::VectorType::get(eltType, - g->target->getVectorWidth())); - - for (int i = 0; i < g->target->getVectorWidth(); ++i) { - llvm::Twine n = llvm::Twine("smear.") + llvm::Twine(name ? name : "") + - llvm::Twine(i); - ret = InsertInst(ret, value, i, n.str().c_str()); + vecType = llvm::VectorType::get(eltType, g->target->getVectorWidth()); } + // Check for a constant case. + if (llvm::Constant *const_val = llvm::dyn_cast(value)) { + ret = llvm::ConstantVector::getSplat( + g->target->getVectorWidth(), + const_val); + return ret; + } + + ret = BroadcastValue(value, vecType, name); + return ret; } @@ -3131,6 +3149,66 @@ FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, } +llvm::Value * +FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, + const char *name) { + if (v1 == NULL || v2 == NULL || mask == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + + if (name == NULL) { + char buf[32]; + sprintf(buf, "_shuffle"); + name = LLVMGetName(v1, buf); + } + + llvm::Instruction *ii = new llvm::ShuffleVectorInst(v1, v2, mask, name, bblock); + + AddDebugPos(ii); + return ii; +} + + +llvm::Value * +FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type* vecType, + const char *name) { + if (v == NULL || vecType == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + + llvm::VectorType *ty = llvm::dyn_cast(vecType); + Assert(ty && ty->getVectorElementType() == v->getType()); + + if (name == NULL) { + char buf[32]; + sprintf(buf, "_broadcast"); + name = LLVMGetName(v, buf); + } + + // Generate the follwoing sequence: + // %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0 + // %name.i = shufflevector <4 x i32> %smear.0, <4 x i32> undef, + // <4 x i32> zeroinitializer + + llvm::Value *undef1 = llvm::UndefValue::get(vecType); + llvm::Value *undef2 = llvm::UndefValue::get(vecType); + + // InsertElement + llvm::Twine tw = llvm::Twine(name) + llvm::Twine("_init"); + llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str()); + + // ShuffleVector + llvm::Constant *zeroVec = llvm::ConstantVector::getSplat( + vecType->getVectorNumElements(), + llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx))); + llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, name); + + return ret; +} + + llvm::PHINode * FunctionEmitContext::PhiNode(llvm::Type *type, int count, const char *name) { @@ -3509,12 +3587,9 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, unifSize = SmearUniform(unifSize); // Compute offset = <0, 1, .. > * unifSize - llvm::Value *varyingOffsets = llvm::UndefValue::get(unifSize->getType()); - for (int i = 0; i < g->target->getVectorWidth(); ++i) { - llvm::Value *iValue = (g->target->is32Bit() || g->opt.force32BitAddressing) ? - LLVMInt32(i) : LLVMInt64(i); - varyingOffsets = InsertInst(varyingOffsets, iValue, i, "varying_delta"); - } + bool is32bits = g->target->is32Bit() || g->opt.force32BitAddressing; + llvm::Value *varyingOffsets = ProgramIndexVector(is32bits); + llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize, varyingOffsets); diff --git a/ctx.h b/ctx.h index 7e262310..58f9aae3 100644 --- a/ctx.h +++ b/ctx.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2012, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -295,6 +295,10 @@ public: that indicates whether the two masks are equal. */ llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2); + /** Generate ConstantVector, which contains ProgramIndex, i.e. + < i32 0, i32 1, i32 2, i32 3> */ + llvm::Value *ProgramIndexVector(bool is32bits = true); + /** Given a string, create an anonymous global variable to hold its value and return the pointer to the string. */ llvm::Value *GetStringPtr(const std::string &str); @@ -500,6 +504,16 @@ public: llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name = NULL); + /** This convenience method maps to an llvm::ShuffleVectorInst. */ + llvm::Value *ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, + const char *name = NULL); + + /** This convenience method to generate broadcast pattern. It takes a value + and a vector type. Type of the value must match element type of the + vector. */ + llvm::Value *BroadcastValue(llvm::Value *v, llvm::Type *vecType, + const char *name = NULL); + llvm::PHINode *PhiNode(llvm::Type *type, int count, const char *name = NULL); llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0, diff --git a/expr.cpp b/expr.cpp index fdf79032..7808d2af 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3905,11 +3905,7 @@ lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr, return ptr; // Onward: compute the per lane offsets. - llvm::Value *varyingOffsets = - llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int i = 0; i < g->target->getVectorWidth(); ++i) - varyingOffsets = ctx->InsertInst(varyingOffsets, LLVMInt32(i), i, - "varying_delta"); + llvm::Value *varyingOffsets = ctx->ProgramIndexVector(); // And finally add the per-lane offsets. Note that we lie to the GEP // call and tell it that the pointers are to uniform elements and not @@ -6768,9 +6764,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { if (!conv) return NULL; - llvm::Value *cast = llvm::UndefValue::get(toType->LLVMType(g->ctx)); - for (int i = 0; i < toVector->GetElementCount(); ++i) - cast = ctx->InsertInst(cast, conv, i); + llvm::Value *cast = ctx->BroadcastValue(conv, toType->LLVMType(g->ctx)); + return cast; } else if (toPointerType != NULL) { diff --git a/llvmutil.cpp b/llvmutil.cpp index e8dd4f9c..26ab72a5 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -601,44 +601,74 @@ lGetIntValue(llvm::Value *offset) { void -LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth, +LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth, llvm::Value **elements) { - for (int i = 0; i < vectorWidth; ++i) + for (int i = 0; i < vectorWidth; ++i) { elements[i] = NULL; + } - while (ie != NULL) { - int64_t iOffset = lGetIntValue(ie->getOperand(2)); - Assert(iOffset >= 0 && iOffset < vectorWidth); - Assert(elements[iOffset] == NULL); + // Catch a pattern of InsertElement chain. + if (llvm::InsertElementInst *ie = + llvm::dyn_cast(inst)) { + while (ie != NULL) { + int64_t iOffset = lGetIntValue(ie->getOperand(2)); + Assert(iOffset >= 0 && iOffset < vectorWidth); + Assert(elements[iOffset] == NULL); - // Get the scalar value from this insert - elements[iOffset] = ie->getOperand(1); + // Get the scalar value from this insert + elements[iOffset] = ie->getOperand(1); - // Do we have another insert? - llvm::Value *insertBase = ie->getOperand(0); - ie = llvm::dyn_cast(insertBase); - if (ie == NULL) { - if (llvm::isa(insertBase)) - return; + // Do we have another insert? + llvm::Value *insertBase = ie->getOperand(0); + ie = llvm::dyn_cast(insertBase); + if (ie == NULL) { + if (llvm::isa(insertBase)) { + return; + } - // Get the value out of a constant vector if that's what we - // have - llvm::ConstantVector *cv = - llvm::dyn_cast(insertBase); + // Get the value out of a constant vector if that's what we + // have + llvm::ConstantVector *cv = + llvm::dyn_cast(insertBase); - // FIXME: this assert is a little questionable; we probably - // shouldn't fail in this case but should just return an - // incomplete result. But there aren't currently any known - // cases where we have anything other than an undef value or a - // constant vector at the base, so if that ever does happen, - // it'd be nice to know what happend so that perhaps we can - // handle it. - // FIXME: Also, should we handle ConstantDataVectors with - // LLVM3.1? What about ConstantAggregateZero values?? - Assert(cv != NULL); + // FIXME: this assert is a little questionable; we probably + // shouldn't fail in this case but should just return an + // incomplete result. But there aren't currently any known + // cases where we have anything other than an undef value or a + // constant vector at the base, so if that ever does happen, + // it'd be nice to know what happend so that perhaps we can + // handle it. + // FIXME: Also, should we handle ConstantDataVectors with + // LLVM3.1? What about ConstantAggregateZero values?? + Assert(cv != NULL); - Assert(iOffset < (int)cv->getNumOperands()); - elements[iOffset] = cv->getOperand((int32_t)iOffset); + Assert(iOffset < (int)cv->getNumOperands()); + elements[iOffset] = cv->getOperand((int32_t)iOffset); + } + } + } + // Catch a pattern of broadcast implemented as InsertElement + Shuffle: + // %broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0 + // %broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef, + // <4 x i32> zeroinitializer + else if (llvm::ShuffleVectorInst *shuf = + llvm::dyn_cast(inst)) { + llvm::Value *indices = shuf->getOperand(2); + if (llvm::isa(indices)) { + llvm::Value *op = shuf->getOperand(0); + llvm::InsertElementInst *ie = llvm::dyn_cast(op); + if (ie != NULL && + llvm::isa(ie->getOperand(0))) { + llvm::ConstantInt *ci = + llvm::dyn_cast(ie->getOperand(2)); + + if (ci->isZero()) { + for (int i = 0; i < vectorWidth; ++i) { + elements[i] = ie->getOperand(1); + } + return; + } + } } } } @@ -694,10 +724,10 @@ lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength, else Assert(LLVMVectorValuesAllEqual(val)); - llvm::InsertElementInst *ie = llvm::dyn_cast(val); - if (ie != NULL) { + if (llvm::isa(val) || + llvm::isa(val)) { llvm::Value *elts[ISPC_MAX_NVEC]; - LLVMFlattenInsertChain(ie, g->target->getVectorWidth(), elts); + LLVMFlattenInsertChain(val, g->target->getVectorWidth(), elts); // We just need to check the scalar first value, since we know that // all elements are equal return lIsExactMultiple(elts[0], baseValue, vectorLength, @@ -1440,10 +1470,10 @@ lExtractFirstVectorElement(llvm::Value *v, // If we have a chain of insertelement instructions, then we can just // flatten them out and grab the value for the first one. - llvm::InsertElementInst *ie = llvm::dyn_cast(v); - if (ie != NULL) { + if (llvm::isa(v) || + llvm::isa(v)) { llvm::Value *elements[ISPC_MAX_NVEC]; - LLVMFlattenInsertChain(ie, vt->getNumElements(), elements); + LLVMFlattenInsertChain(v, vt->getNumElements(), elements); return elements[0]; } diff --git a/llvmutil.h b/llvmutil.h index d14a5000..c8d6f32b 100644 --- a/llvmutil.h +++ b/llvmutil.h @@ -264,8 +264,13 @@ extern bool LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts); constant vector. For anything more complex (e.g. some other arbitrary value, it doesn't try to extract element values into the returned array. + + This also handles common broadcast pattern: + %broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0 + %broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef, + <4 x i32> zeroinitializer */ -extern void LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth, +extern void LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth, llvm::Value **elements); /** This is a utility routine for debugging that dumps out the given LLVM diff --git a/opt.cpp b/opt.cpp index b310c155..687aa507 100644 --- a/opt.cpp +++ b/opt.cpp @@ -1058,10 +1058,10 @@ lCheckForActualPointer(llvm::Value *v) { */ static llvm::Value * lGetBasePointer(llvm::Value *v) { - llvm::InsertElementInst *ie = llvm::dyn_cast(v); - if (ie != NULL) { + if (llvm::isa(v) || + llvm::isa(v)) { llvm::Value *elements[ISPC_MAX_NVEC]; - LLVMFlattenInsertChain(ie, g->target->getVectorWidth(), elements); + LLVMFlattenInsertChain(v, g->target->getVectorWidth(), elements); // Make sure none of the elements is undefined. // TODO: it's probably ok to allow undefined elements and return @@ -1080,9 +1080,12 @@ lGetBasePointer(llvm::Value *v) { } // This case comes up with global/static arrays - llvm::ConstantVector *cv = llvm::dyn_cast(v); - if (cv != NULL) + if (llvm::ConstantVector *cv = llvm::dyn_cast(v)) { return lCheckForActualPointer(cv->getSplatValue()); + } + else if (llvm::ConstantDataVector *cdv = llvm::dyn_cast(v)) { + return lCheckForActualPointer(cdv->getSplatValue()); + } return NULL; } diff --git a/stmt.cpp b/stmt.cpp index 0b789626..5ada5584 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -1272,11 +1272,8 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx, const std::vector &spans) { // Smear the uniform counter value out to be varying llvm::Value *counter = ctx->LoadInst(uniformCounterPtr); - llvm::Value *smearCounter = - llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int i = 0; i < g->target->getVectorWidth(); ++i) - smearCounter = - ctx->InsertInst(smearCounter, counter, i, "smear_counter"); + llvm::Value *smearCounter = ctx->BroadcastValue( + counter, LLVMTypes::Int32VectorType, "smear_counter"); // Figure out the offsets; this is a little bit tricky. As an example, // consider a 2D tiled foreach loop, where we're running 8-wide and @@ -1517,9 +1514,9 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const { lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i], dimVariables[i]->storagePtr, span); - llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int j = 0; j < g->target->getVectorWidth(); ++j) - smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end"); + llvm::Value *smearEnd = ctx->BroadcastValue( + endVals[i], LLVMTypes::Int32VectorType, "smear_end"); + // Do a vector compare of its value to the end value to generate a // mask for this last bit of work. llvm::Value *emask = @@ -1662,9 +1659,9 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const { ctx->SetCurrentBasicBlock(bbPartial); { llvm::Value *varyingCounter = ctx->LoadInst(dimVariables[nDims-1]->storagePtr); - llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int j = 0; j < g->target->getVectorWidth(); ++j) - smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end"); + llvm::Value *smearEnd = ctx->BroadcastValue( + endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end"); + llvm::Value *emask = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, varyingCounter, smearEnd); @@ -1758,9 +1755,8 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const { llvm::Value *varyingCounter = lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], dimVariables[nDims-1]->storagePtr, span); - llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int j = 0; j < g->target->getVectorWidth(); ++j) - smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end"); + llvm::Value *smearEnd = ctx->BroadcastValue( + endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end"); llvm::Value *emask = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, varyingCounter, smearEnd); @@ -1993,11 +1989,7 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const { // math...) // Get the "program index" vector value - llvm::Value *programIndex = - llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int i = 0; i < g->target->getVectorWidth(); ++i) - programIndex = ctx->InsertInst(programIndex, LLVMInt32(i), i, - "prog_index"); + llvm::Value *programIndex = ctx->ProgramIndexVector(); // And smear the current lane out to a vector llvm::Value *firstSet32 =