Additional cleanup to enable more broadcasts
This commit is contained in:
71
ctx.cpp
71
ctx.cpp
@@ -1380,10 +1380,10 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::ProgramIndex() {
|
FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
||||||
llvm::SmallVector<llvm::Constant*, 16> array;
|
llvm::SmallVector<llvm::Constant*, 16> array;
|
||||||
for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
|
for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
|
||||||
llvm::Constant *C = LLVMInt32(i);
|
llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i);
|
||||||
array.push_back(C);
|
array.push_back(C);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1765,24 +1765,7 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate the follwoing sequence:
|
ret = BroadcastValue(value, vecType, name);
|
||||||
// %broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
|
|
||||||
// %broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
|
|
||||||
// <4 x i32> zeroinitializer
|
|
||||||
//
|
|
||||||
llvm::Value *undef1 = llvm::UndefValue::get(vecType);
|
|
||||||
llvm::Value *undef2 = llvm::UndefValue::get(vecType);
|
|
||||||
|
|
||||||
// InsertElement
|
|
||||||
llvm::Twine tw1 = llvm::Twine("broadcast_init.") + llvm::Twine(name ? name : "");
|
|
||||||
llvm::Value *insert = InsertInst(undef1, value, 0, tw1.str().c_str());
|
|
||||||
|
|
||||||
// ShuffleVector
|
|
||||||
llvm::Constant *zeroVec = llvm::ConstantVector::getSplat(
|
|
||||||
g->target->getVectorWidth(),
|
|
||||||
llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
|
|
||||||
llvm::Twine tw2 = llvm::Twine("broadcast.") + llvm::Twine(name ? name : "");
|
|
||||||
ret = ShuffleInst(insert, undef2, zeroVec, tw2.str().c_str());
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -3187,6 +3170,45 @@ FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Value *
|
||||||
|
FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type* vecType,
|
||||||
|
const char *name) {
|
||||||
|
if (v == NULL || vecType == NULL) {
|
||||||
|
AssertPos(currentPos, m->errorCount > 0);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::VectorType *ty = llvm::dyn_cast<llvm::VectorType>(vecType);
|
||||||
|
Assert(ty && ty->getVectorElementType() == v->getType());
|
||||||
|
|
||||||
|
if (name == NULL) {
|
||||||
|
char buf[32];
|
||||||
|
sprintf(buf, "_broadcast");
|
||||||
|
name = LLVMGetName(v, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate the follwoing sequence:
|
||||||
|
// %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0
|
||||||
|
// %name.i = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
|
||||||
|
// <4 x i32> zeroinitializer
|
||||||
|
|
||||||
|
llvm::Value *undef1 = llvm::UndefValue::get(vecType);
|
||||||
|
llvm::Value *undef2 = llvm::UndefValue::get(vecType);
|
||||||
|
|
||||||
|
// InsertElement
|
||||||
|
llvm::Twine tw = llvm::Twine(name) + llvm::Twine("_init");
|
||||||
|
llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str());
|
||||||
|
|
||||||
|
// ShuffleVector
|
||||||
|
llvm::Constant *zeroVec = llvm::ConstantVector::getSplat(
|
||||||
|
vecType->getVectorNumElements(),
|
||||||
|
llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
|
||||||
|
llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, name);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
llvm::PHINode *
|
llvm::PHINode *
|
||||||
FunctionEmitContext::PhiNode(llvm::Type *type, int count,
|
FunctionEmitContext::PhiNode(llvm::Type *type, int count,
|
||||||
const char *name) {
|
const char *name) {
|
||||||
@@ -3565,12 +3587,9 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr,
|
|||||||
unifSize = SmearUniform(unifSize);
|
unifSize = SmearUniform(unifSize);
|
||||||
|
|
||||||
// Compute offset = <0, 1, .. > * unifSize
|
// Compute offset = <0, 1, .. > * unifSize
|
||||||
llvm::Value *varyingOffsets = llvm::UndefValue::get(unifSize->getType());
|
bool is32bits = g->target->is32Bit() || g->opt.force32BitAddressing;
|
||||||
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
|
llvm::Value *varyingOffsets = ProgramIndexVector(is32bits);
|
||||||
llvm::Value *iValue = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
|
|
||||||
LLVMInt32(i) : LLVMInt64(i);
|
|
||||||
varyingOffsets = InsertInst(varyingOffsets, iValue, i, "varying_delta");
|
|
||||||
}
|
|
||||||
llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize,
|
llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize,
|
||||||
varyingOffsets);
|
varyingOffsets);
|
||||||
|
|
||||||
|
|||||||
9
ctx.h
9
ctx.h
@@ -297,7 +297,7 @@ public:
|
|||||||
|
|
||||||
/** Generate ConstantVector, which contains ProgramIndex, i.e.
|
/** Generate ConstantVector, which contains ProgramIndex, i.e.
|
||||||
< i32 0, i32 1, i32 2, i32 3> */
|
< i32 0, i32 1, i32 2, i32 3> */
|
||||||
llvm::Value *ProgramIndex();
|
llvm::Value *ProgramIndexVector(bool is32bits = true);
|
||||||
|
|
||||||
/** Given a string, create an anonymous global variable to hold its
|
/** Given a string, create an anonymous global variable to hold its
|
||||||
value and return the pointer to the string. */
|
value and return the pointer to the string. */
|
||||||
@@ -504,9 +504,16 @@ public:
|
|||||||
llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
|
llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
|
||||||
const char *name = NULL);
|
const char *name = NULL);
|
||||||
|
|
||||||
|
/** This convenience method maps to an llvm::ShuffleVectorInst. */
|
||||||
llvm::Value *ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask,
|
llvm::Value *ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask,
|
||||||
const char *name = NULL);
|
const char *name = NULL);
|
||||||
|
|
||||||
|
/** This convenience method to generate broadcast pattern. It takes a value
|
||||||
|
and a vector type. Type of the value must match element type of the
|
||||||
|
vector. */
|
||||||
|
llvm::Value *BroadcastValue(llvm::Value *v, llvm::Type *vecType,
|
||||||
|
const char *name = NULL);
|
||||||
|
|
||||||
llvm::PHINode *PhiNode(llvm::Type *type, int count,
|
llvm::PHINode *PhiNode(llvm::Type *type, int count,
|
||||||
const char *name = NULL);
|
const char *name = NULL);
|
||||||
llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0,
|
llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0,
|
||||||
|
|||||||
11
expr.cpp
11
expr.cpp
@@ -3905,11 +3905,7 @@ lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr,
|
|||||||
return ptr;
|
return ptr;
|
||||||
|
|
||||||
// Onward: compute the per lane offsets.
|
// Onward: compute the per lane offsets.
|
||||||
llvm::Value *varyingOffsets =
|
llvm::Value *varyingOffsets = ctx->ProgramIndexVector();
|
||||||
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
|
||||||
for (int i = 0; i < g->target->getVectorWidth(); ++i)
|
|
||||||
varyingOffsets = ctx->InsertInst(varyingOffsets, LLVMInt32(i), i,
|
|
||||||
"varying_delta");
|
|
||||||
|
|
||||||
// And finally add the per-lane offsets. Note that we lie to the GEP
|
// And finally add the per-lane offsets. Note that we lie to the GEP
|
||||||
// call and tell it that the pointers are to uniform elements and not
|
// call and tell it that the pointers are to uniform elements and not
|
||||||
@@ -6768,9 +6764,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
|
|||||||
if (!conv)
|
if (!conv)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
llvm::Value *cast = llvm::UndefValue::get(toType->LLVMType(g->ctx));
|
llvm::Value *cast = ctx->BroadcastValue(conv, toType->LLVMType(g->ctx));
|
||||||
for (int i = 0; i < toVector->GetElementCount(); ++i)
|
|
||||||
cast = ctx->InsertInst(cast, conv, i);
|
|
||||||
return cast;
|
return cast;
|
||||||
}
|
}
|
||||||
else if (toPointerType != NULL) {
|
else if (toPointerType != NULL) {
|
||||||
|
|||||||
26
stmt.cpp
26
stmt.cpp
@@ -1272,11 +1272,8 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
|
|||||||
const std::vector<int> &spans) {
|
const std::vector<int> &spans) {
|
||||||
// Smear the uniform counter value out to be varying
|
// Smear the uniform counter value out to be varying
|
||||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
||||||
llvm::Value *smearCounter =
|
llvm::Value *smearCounter = ctx->BroadcastValue(
|
||||||
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
counter, LLVMTypes::Int32VectorType, "smear_counter");
|
||||||
for (int i = 0; i < g->target->getVectorWidth(); ++i)
|
|
||||||
smearCounter =
|
|
||||||
ctx->InsertInst(smearCounter, counter, i, "smear_counter");
|
|
||||||
|
|
||||||
// Figure out the offsets; this is a little bit tricky. As an example,
|
// Figure out the offsets; this is a little bit tricky. As an example,
|
||||||
// consider a 2D tiled foreach loop, where we're running 8-wide and
|
// consider a 2D tiled foreach loop, where we're running 8-wide and
|
||||||
@@ -1517,9 +1514,9 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
|
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
|
||||||
dimVariables[i]->storagePtr, span);
|
dimVariables[i]->storagePtr, span);
|
||||||
|
|
||||||
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||||
for (int j = 0; j < g->target->getVectorWidth(); ++j)
|
endVals[i], LLVMTypes::Int32VectorType, "smear_end");
|
||||||
smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end");
|
|
||||||
// Do a vector compare of its value to the end value to generate a
|
// Do a vector compare of its value to the end value to generate a
|
||||||
// mask for this last bit of work.
|
// mask for this last bit of work.
|
||||||
llvm::Value *emask =
|
llvm::Value *emask =
|
||||||
@@ -1662,9 +1659,9 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
ctx->SetCurrentBasicBlock(bbPartial); {
|
ctx->SetCurrentBasicBlock(bbPartial); {
|
||||||
llvm::Value *varyingCounter =
|
llvm::Value *varyingCounter =
|
||||||
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
|
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
|
||||||
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||||
for (int j = 0; j < g->target->getVectorWidth(); ++j)
|
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||||
smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
|
|
||||||
llvm::Value *emask =
|
llvm::Value *emask =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
varyingCounter, smearEnd);
|
varyingCounter, smearEnd);
|
||||||
@@ -1758,9 +1755,8 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
llvm::Value *varyingCounter =
|
llvm::Value *varyingCounter =
|
||||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||||
dimVariables[nDims-1]->storagePtr, span);
|
dimVariables[nDims-1]->storagePtr, span);
|
||||||
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||||
for (int j = 0; j < g->target->getVectorWidth(); ++j)
|
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||||
smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
|
|
||||||
llvm::Value *emask =
|
llvm::Value *emask =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
varyingCounter, smearEnd);
|
varyingCounter, smearEnd);
|
||||||
@@ -1993,7 +1989,7 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// math...)
|
// math...)
|
||||||
|
|
||||||
// Get the "program index" vector value
|
// Get the "program index" vector value
|
||||||
llvm::Value *programIndex = ctx->ProgramIndex();
|
llvm::Value *programIndex = ctx->ProgramIndexVector();
|
||||||
|
|
||||||
// And smear the current lane out to a vector
|
// And smear the current lane out to a vector
|
||||||
llvm::Value *firstSet32 =
|
llvm::Value *firstSet32 =
|
||||||
|
|||||||
Reference in New Issue
Block a user