diff --git a/ctx.cpp b/ctx.cpp index f8fb2962..5c8b16f2 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1380,10 +1380,10 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) { } llvm::Value * -FunctionEmitContext::ProgramIndex() { +FunctionEmitContext::ProgramIndexVector(bool is32bits) { llvm::SmallVector array; for (int i = 0; i < g->target->getVectorWidth() ; ++i) { - llvm::Constant *C = LLVMInt32(i); + llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i); array.push_back(C); } @@ -1765,24 +1765,7 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) { return ret; } - // Generate the follwoing sequence: - // %broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0 - // %broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef, - // <4 x i32> zeroinitializer - // - llvm::Value *undef1 = llvm::UndefValue::get(vecType); - llvm::Value *undef2 = llvm::UndefValue::get(vecType); - - // InsertElement - llvm::Twine tw1 = llvm::Twine("broadcast_init.") + llvm::Twine(name ? name : ""); - llvm::Value *insert = InsertInst(undef1, value, 0, tw1.str().c_str()); - - // ShuffleVector - llvm::Constant *zeroVec = llvm::ConstantVector::getSplat( - g->target->getVectorWidth(), - llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx))); - llvm::Twine tw2 = llvm::Twine("broadcast.") + llvm::Twine(name ? name : ""); - ret = ShuffleInst(insert, undef2, zeroVec, tw2.str().c_str()); + ret = BroadcastValue(value, vecType, name); return ret; } @@ -3187,6 +3170,45 @@ FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value * } +llvm::Value * +FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type* vecType, + const char *name) { + if (v == NULL || vecType == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + + llvm::VectorType *ty = llvm::dyn_cast(vecType); + Assert(ty && ty->getVectorElementType() == v->getType()); + + if (name == NULL) { + char buf[32]; + sprintf(buf, "_broadcast"); + name = LLVMGetName(v, buf); + } + + // Generate the follwoing sequence: + // %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0 + // %name.i = shufflevector <4 x i32> %smear.0, <4 x i32> undef, + // <4 x i32> zeroinitializer + + llvm::Value *undef1 = llvm::UndefValue::get(vecType); + llvm::Value *undef2 = llvm::UndefValue::get(vecType); + + // InsertElement + llvm::Twine tw = llvm::Twine(name) + llvm::Twine("_init"); + llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str()); + + // ShuffleVector + llvm::Constant *zeroVec = llvm::ConstantVector::getSplat( + vecType->getVectorNumElements(), + llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx))); + llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, name); + + return ret; +} + + llvm::PHINode * FunctionEmitContext::PhiNode(llvm::Type *type, int count, const char *name) { @@ -3565,12 +3587,9 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, unifSize = SmearUniform(unifSize); // Compute offset = <0, 1, .. > * unifSize - llvm::Value *varyingOffsets = llvm::UndefValue::get(unifSize->getType()); - for (int i = 0; i < g->target->getVectorWidth(); ++i) { - llvm::Value *iValue = (g->target->is32Bit() || g->opt.force32BitAddressing) ? - LLVMInt32(i) : LLVMInt64(i); - varyingOffsets = InsertInst(varyingOffsets, iValue, i, "varying_delta"); - } + bool is32bits = g->target->is32Bit() || g->opt.force32BitAddressing; + llvm::Value *varyingOffsets = ProgramIndexVector(is32bits); + llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize, varyingOffsets); diff --git a/ctx.h b/ctx.h index 7b74ffa8..58f9aae3 100644 --- a/ctx.h +++ b/ctx.h @@ -297,7 +297,7 @@ public: /** Generate ConstantVector, which contains ProgramIndex, i.e. < i32 0, i32 1, i32 2, i32 3> */ - llvm::Value *ProgramIndex(); + llvm::Value *ProgramIndexVector(bool is32bits = true); /** Given a string, create an anonymous global variable to hold its value and return the pointer to the string. */ @@ -504,9 +504,16 @@ public: llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name = NULL); + /** This convenience method maps to an llvm::ShuffleVectorInst. */ llvm::Value *ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, const char *name = NULL); + /** This convenience method to generate broadcast pattern. It takes a value + and a vector type. Type of the value must match element type of the + vector. */ + llvm::Value *BroadcastValue(llvm::Value *v, llvm::Type *vecType, + const char *name = NULL); + llvm::PHINode *PhiNode(llvm::Type *type, int count, const char *name = NULL); llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0, diff --git a/expr.cpp b/expr.cpp index fdf79032..7808d2af 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3905,11 +3905,7 @@ lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr, return ptr; // Onward: compute the per lane offsets. - llvm::Value *varyingOffsets = - llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int i = 0; i < g->target->getVectorWidth(); ++i) - varyingOffsets = ctx->InsertInst(varyingOffsets, LLVMInt32(i), i, - "varying_delta"); + llvm::Value *varyingOffsets = ctx->ProgramIndexVector(); // And finally add the per-lane offsets. Note that we lie to the GEP // call and tell it that the pointers are to uniform elements and not @@ -6768,9 +6764,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { if (!conv) return NULL; - llvm::Value *cast = llvm::UndefValue::get(toType->LLVMType(g->ctx)); - for (int i = 0; i < toVector->GetElementCount(); ++i) - cast = ctx->InsertInst(cast, conv, i); + llvm::Value *cast = ctx->BroadcastValue(conv, toType->LLVMType(g->ctx)); + return cast; } else if (toPointerType != NULL) { diff --git a/stmt.cpp b/stmt.cpp index 32fe672a..5ada5584 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -1272,11 +1272,8 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx, const std::vector &spans) { // Smear the uniform counter value out to be varying llvm::Value *counter = ctx->LoadInst(uniformCounterPtr); - llvm::Value *smearCounter = - llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int i = 0; i < g->target->getVectorWidth(); ++i) - smearCounter = - ctx->InsertInst(smearCounter, counter, i, "smear_counter"); + llvm::Value *smearCounter = ctx->BroadcastValue( + counter, LLVMTypes::Int32VectorType, "smear_counter"); // Figure out the offsets; this is a little bit tricky. As an example, // consider a 2D tiled foreach loop, where we're running 8-wide and @@ -1517,9 +1514,9 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const { lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i], dimVariables[i]->storagePtr, span); - llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int j = 0; j < g->target->getVectorWidth(); ++j) - smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end"); + llvm::Value *smearEnd = ctx->BroadcastValue( + endVals[i], LLVMTypes::Int32VectorType, "smear_end"); + // Do a vector compare of its value to the end value to generate a // mask for this last bit of work. llvm::Value *emask = @@ -1662,9 +1659,9 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const { ctx->SetCurrentBasicBlock(bbPartial); { llvm::Value *varyingCounter = ctx->LoadInst(dimVariables[nDims-1]->storagePtr); - llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int j = 0; j < g->target->getVectorWidth(); ++j) - smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end"); + llvm::Value *smearEnd = ctx->BroadcastValue( + endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end"); + llvm::Value *emask = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, varyingCounter, smearEnd); @@ -1758,9 +1755,8 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const { llvm::Value *varyingCounter = lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], dimVariables[nDims-1]->storagePtr, span); - llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); - for (int j = 0; j < g->target->getVectorWidth(); ++j) - smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end"); + llvm::Value *smearEnd = ctx->BroadcastValue( + endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end"); llvm::Value *emask = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, varyingCounter, smearEnd); @@ -1993,7 +1989,7 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const { // math...) // Get the "program index" vector value - llvm::Value *programIndex = ctx->ProgramIndex(); + llvm::Value *programIndex = ctx->ProgramIndexVector(); // And smear the current lane out to a vector llvm::Value *firstSet32 =