Merge pull request #467 from dbabokin/broadcast
Broadcast implementation as InsertElement+Shuffle and related improvements
This commit is contained in:
142
cbackend.cpp
142
cbackend.cpp
@@ -4395,16 +4395,21 @@ public:
|
|||||||
|
|
||||||
static char ID;
|
static char ID;
|
||||||
llvm::Module *module;
|
llvm::Module *module;
|
||||||
int vectorWidth;
|
unsigned int vectorWidth;
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned int ChainLength(llvm::InsertElementInst *inst) const;
|
||||||
|
llvm::Value *getInsertChainSmearValue(llvm::Instruction* inst) const;
|
||||||
|
llvm::Value *getShuffleSmearValue(llvm::Instruction* inst) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
char SmearCleanupPass::ID = 0;
|
char SmearCleanupPass::ID = 0;
|
||||||
|
|
||||||
|
|
||||||
static int
|
unsigned int
|
||||||
lChainLength(llvm::InsertElementInst *inst) {
|
SmearCleanupPass::ChainLength(llvm::InsertElementInst *inst) const {
|
||||||
int length = 0;
|
unsigned int length = 0;
|
||||||
while (inst != NULL) {
|
while (inst != NULL) {
|
||||||
++length;
|
++length;
|
||||||
inst = llvm::dyn_cast<llvm::InsertElementInst>(inst->getOperand(0));
|
inst = llvm::dyn_cast<llvm::InsertElementInst>(inst->getOperand(0));
|
||||||
@@ -4413,45 +4418,105 @@ lChainLength(llvm::InsertElementInst *inst) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Value *
|
||||||
|
SmearCleanupPass::getInsertChainSmearValue(llvm::Instruction* inst) const {
|
||||||
|
// TODO: we don't check indexes where we do insertion, so we may trigger
|
||||||
|
// transformation for a wrong chain.
|
||||||
|
// This way of doing broadcast is obsolete and should be probably removed
|
||||||
|
// some day.
|
||||||
|
|
||||||
|
llvm::InsertElementInst *insertInst =
|
||||||
|
llvm::dyn_cast<llvm::InsertElementInst>(inst);
|
||||||
|
if (!insertInst) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We consider only chians of vectorWidth length.
|
||||||
|
if (ChainLength(insertInst) != vectorWidth) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: we only want to do this to vectors with width equal to
|
||||||
|
// the target vector width. But we can't easily get that here, so
|
||||||
|
// for now we at least avoid one case where we definitely don't
|
||||||
|
// want to do this.
|
||||||
|
llvm::VectorType *vt = llvm::dyn_cast<llvm::VectorType>(insertInst->getType());
|
||||||
|
if (vt->getNumElements() == 1) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Value *smearValue = NULL;
|
||||||
|
while (insertInst != NULL) {
|
||||||
|
// operand 1 is inserted value
|
||||||
|
llvm::Value *insertValue = insertInst->getOperand(1);
|
||||||
|
if (smearValue == NULL) {
|
||||||
|
smearValue = insertValue;
|
||||||
|
}
|
||||||
|
else if (smearValue != insertValue) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// operand 0 is a vector to insert into.
|
||||||
|
insertInst =
|
||||||
|
llvm::dyn_cast<llvm::InsertElementInst>(insertInst->getOperand(0));
|
||||||
|
}
|
||||||
|
assert(smearValue != NULL);
|
||||||
|
|
||||||
|
return smearValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Value *
|
||||||
|
SmearCleanupPass::getShuffleSmearValue(llvm::Instruction* inst) const {
|
||||||
|
llvm::ShuffleVectorInst *shuffleInst =
|
||||||
|
llvm::dyn_cast<llvm::ShuffleVectorInst>(inst);
|
||||||
|
if (!shuffleInst) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Constant* mask =
|
||||||
|
llvm::dyn_cast<llvm::Constant>(shuffleInst->getOperand(2));
|
||||||
|
|
||||||
|
// Check that the shuffle is a broadcast of the first element of the first vector,
|
||||||
|
// i.e. mask vector is all-zeros vector of expected size.
|
||||||
|
if (!(mask &&
|
||||||
|
mask->isNullValue() &&
|
||||||
|
llvm::dyn_cast<llvm::VectorType>(mask->getType())->getNumElements() == vectorWidth)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::InsertElementInst *insertInst =
|
||||||
|
llvm::dyn_cast<llvm::InsertElementInst>(shuffleInst->getOperand(0));
|
||||||
|
|
||||||
|
// Check that it's an InsertElementInst that inserts a value to first element.
|
||||||
|
if (!(insertInst &&
|
||||||
|
llvm::isa<llvm::Constant>(insertInst->getOperand(2)) &&
|
||||||
|
llvm::dyn_cast<llvm::Constant>(insertInst->getOperand(2))->isNullValue())) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Value *result = insertInst->getOperand(1);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||||
bool modifiedAny = false;
|
bool modifiedAny = false;
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||||
llvm::InsertElementInst *insertInst =
|
llvm::Value *smearValue = NULL;
|
||||||
llvm::dyn_cast<llvm::InsertElementInst>(&*iter);
|
|
||||||
if (insertInst == NULL)
|
if (!(smearValue = getInsertChainSmearValue(iter)) &&
|
||||||
|
!(smearValue = getShuffleSmearValue(iter))) {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Only do this on the last insert in a chain...
|
|
||||||
if (lChainLength(insertInst) != vectorWidth)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// FIXME: we only want to do this to vectors with width equal to
|
|
||||||
// the target vector width. But we can't easily get that here, so
|
|
||||||
// for now we at least avoid one case where we definitely don't
|
|
||||||
// want to do this.
|
|
||||||
llvm::VectorType *vt = llvm::dyn_cast<llvm::VectorType>(insertInst->getType());
|
|
||||||
if (vt->getNumElements() == 1)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
llvm::Value *toMatch = NULL;
|
|
||||||
while (insertInst != NULL) {
|
|
||||||
llvm::Value *insertValue = insertInst->getOperand(1);
|
|
||||||
if (toMatch == NULL)
|
|
||||||
toMatch = insertValue;
|
|
||||||
else if (toMatch != insertValue)
|
|
||||||
goto not_equal;
|
|
||||||
|
|
||||||
insertInst =
|
|
||||||
llvm::dyn_cast<llvm::InsertElementInst>(insertInst->getOperand(0));
|
|
||||||
}
|
}
|
||||||
assert(toMatch != NULL);
|
|
||||||
|
|
||||||
{
|
llvm::Type *smearType = smearValue->getType();
|
||||||
llvm::Type *matchType = toMatch->getType();
|
const char *smearFuncName = lGetTypedFunc("smear", smearType, vectorWidth);
|
||||||
const char *smearFuncName = lGetTypedFunc("smear", matchType, vectorWidth);
|
|
||||||
if (smearFuncName != NULL) {
|
if (smearFuncName != NULL) {
|
||||||
llvm::Function *smearFunc = module->getFunction(smearFuncName);
|
llvm::Function *smearFunc = module->getFunction(smearFuncName);
|
||||||
if (smearFunc == NULL) {
|
if (smearFunc == NULL) {
|
||||||
@@ -4460,7 +4525,7 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
// parameter type.
|
// parameter type.
|
||||||
llvm::Constant *sf =
|
llvm::Constant *sf =
|
||||||
module->getOrInsertFunction(smearFuncName, iter->getType(),
|
module->getOrInsertFunction(smearFuncName, iter->getType(),
|
||||||
matchType, NULL);
|
smearType, NULL);
|
||||||
smearFunc = llvm::dyn_cast<llvm::Function>(sf);
|
smearFunc = llvm::dyn_cast<llvm::Function>(sf);
|
||||||
assert(smearFunc != NULL);
|
assert(smearFunc != NULL);
|
||||||
#if defined(LLVM_3_1)
|
#if defined(LLVM_3_1)
|
||||||
@@ -4473,10 +4538,10 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
assert(smearFunc != NULL);
|
assert(smearFunc != NULL);
|
||||||
llvm::Value *args[1] = { toMatch };
|
llvm::Value *args[1] = { smearValue };
|
||||||
llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[1]);
|
llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[1]);
|
||||||
llvm::Instruction *smearCall =
|
llvm::Instruction *smearCall =
|
||||||
llvm::CallInst::Create(smearFunc, argArray, LLVMGetName(toMatch, "_smear"),
|
llvm::CallInst::Create(smearFunc, argArray, LLVMGetName(smearValue, "_smear"),
|
||||||
(llvm::Instruction *)NULL);
|
(llvm::Instruction *)NULL);
|
||||||
|
|
||||||
ReplaceInstWithInst(iter, smearCall);
|
ReplaceInstWithInst(iter, smearCall);
|
||||||
@@ -4485,9 +4550,6 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
not_equal:
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
return modifiedAny;
|
return modifiedAny;
|
||||||
}
|
}
|
||||||
|
|||||||
105
ctx.cpp
105
ctx.cpp
@@ -1379,6 +1379,19 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llvm::Value *
|
||||||
|
FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
||||||
|
llvm::SmallVector<llvm::Constant*, 16> array;
|
||||||
|
for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
|
||||||
|
llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i);
|
||||||
|
array.push_back(C);
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Constant* index = llvm::ConstantVector::get(array);
|
||||||
|
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::GetStringPtr(const std::string &str) {
|
FunctionEmitContext::GetStringPtr(const std::string &str) {
|
||||||
@@ -1729,26 +1742,31 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
|
|||||||
|
|
||||||
llvm::Value *ret = NULL;
|
llvm::Value *ret = NULL;
|
||||||
llvm::Type *eltType = value->getType();
|
llvm::Type *eltType = value->getType();
|
||||||
|
llvm::Type *vecType = NULL;
|
||||||
|
|
||||||
llvm::PointerType *pt =
|
llvm::PointerType *pt =
|
||||||
llvm::dyn_cast<llvm::PointerType>(eltType);
|
llvm::dyn_cast<llvm::PointerType>(eltType);
|
||||||
if (pt != NULL) {
|
if (pt != NULL) {
|
||||||
// Varying pointers are represented as vectors of i32/i64s
|
// Varying pointers are represented as vectors of i32/i64s
|
||||||
ret = llvm::UndefValue::get(LLVMTypes::VoidPointerVectorType);
|
vecType = LLVMTypes::VoidPointerVectorType;
|
||||||
value = PtrToIntInst(value);
|
value = PtrToIntInst(value);
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
// All other varying types are represented as vectors of the
|
// All other varying types are represented as vectors of the
|
||||||
// underlying type.
|
// underlying type.
|
||||||
ret = llvm::UndefValue::get(llvm::VectorType::get(eltType,
|
vecType = llvm::VectorType::get(eltType, g->target->getVectorWidth());
|
||||||
g->target->getVectorWidth()));
|
|
||||||
|
|
||||||
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
|
|
||||||
llvm::Twine n = llvm::Twine("smear.") + llvm::Twine(name ? name : "") +
|
|
||||||
llvm::Twine(i);
|
|
||||||
ret = InsertInst(ret, value, i, n.str().c_str());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for a constant case.
|
||||||
|
if (llvm::Constant *const_val = llvm::dyn_cast<llvm::Constant>(value)) {
|
||||||
|
ret = llvm::ConstantVector::getSplat(
|
||||||
|
g->target->getVectorWidth(),
|
||||||
|
const_val);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = BroadcastValue(value, vecType, name);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3131,6 +3149,66 @@ FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Value *
|
||||||
|
FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask,
|
||||||
|
const char *name) {
|
||||||
|
if (v1 == NULL || v2 == NULL || mask == NULL) {
|
||||||
|
AssertPos(currentPos, m->errorCount > 0);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name == NULL) {
|
||||||
|
char buf[32];
|
||||||
|
sprintf(buf, "_shuffle");
|
||||||
|
name = LLVMGetName(v1, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Instruction *ii = new llvm::ShuffleVectorInst(v1, v2, mask, name, bblock);
|
||||||
|
|
||||||
|
AddDebugPos(ii);
|
||||||
|
return ii;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Value *
|
||||||
|
FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type* vecType,
|
||||||
|
const char *name) {
|
||||||
|
if (v == NULL || vecType == NULL) {
|
||||||
|
AssertPos(currentPos, m->errorCount > 0);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::VectorType *ty = llvm::dyn_cast<llvm::VectorType>(vecType);
|
||||||
|
Assert(ty && ty->getVectorElementType() == v->getType());
|
||||||
|
|
||||||
|
if (name == NULL) {
|
||||||
|
char buf[32];
|
||||||
|
sprintf(buf, "_broadcast");
|
||||||
|
name = LLVMGetName(v, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate the follwoing sequence:
|
||||||
|
// %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0
|
||||||
|
// %name.i = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
|
||||||
|
// <4 x i32> zeroinitializer
|
||||||
|
|
||||||
|
llvm::Value *undef1 = llvm::UndefValue::get(vecType);
|
||||||
|
llvm::Value *undef2 = llvm::UndefValue::get(vecType);
|
||||||
|
|
||||||
|
// InsertElement
|
||||||
|
llvm::Twine tw = llvm::Twine(name) + llvm::Twine("_init");
|
||||||
|
llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str());
|
||||||
|
|
||||||
|
// ShuffleVector
|
||||||
|
llvm::Constant *zeroVec = llvm::ConstantVector::getSplat(
|
||||||
|
vecType->getVectorNumElements(),
|
||||||
|
llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
|
||||||
|
llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, name);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
llvm::PHINode *
|
llvm::PHINode *
|
||||||
FunctionEmitContext::PhiNode(llvm::Type *type, int count,
|
FunctionEmitContext::PhiNode(llvm::Type *type, int count,
|
||||||
const char *name) {
|
const char *name) {
|
||||||
@@ -3509,12 +3587,9 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr,
|
|||||||
unifSize = SmearUniform(unifSize);
|
unifSize = SmearUniform(unifSize);
|
||||||
|
|
||||||
// Compute offset = <0, 1, .. > * unifSize
|
// Compute offset = <0, 1, .. > * unifSize
|
||||||
llvm::Value *varyingOffsets = llvm::UndefValue::get(unifSize->getType());
|
bool is32bits = g->target->is32Bit() || g->opt.force32BitAddressing;
|
||||||
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
|
llvm::Value *varyingOffsets = ProgramIndexVector(is32bits);
|
||||||
llvm::Value *iValue = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
|
|
||||||
LLVMInt32(i) : LLVMInt64(i);
|
|
||||||
varyingOffsets = InsertInst(varyingOffsets, iValue, i, "varying_delta");
|
|
||||||
}
|
|
||||||
llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize,
|
llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize,
|
||||||
varyingOffsets);
|
varyingOffsets);
|
||||||
|
|
||||||
|
|||||||
16
ctx.h
16
ctx.h
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2010-2012, Intel Corporation
|
Copyright (c) 2010-2013, Intel Corporation
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@@ -295,6 +295,10 @@ public:
|
|||||||
that indicates whether the two masks are equal. */
|
that indicates whether the two masks are equal. */
|
||||||
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
|
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
|
||||||
|
|
||||||
|
/** Generate ConstantVector, which contains ProgramIndex, i.e.
|
||||||
|
< i32 0, i32 1, i32 2, i32 3> */
|
||||||
|
llvm::Value *ProgramIndexVector(bool is32bits = true);
|
||||||
|
|
||||||
/** Given a string, create an anonymous global variable to hold its
|
/** Given a string, create an anonymous global variable to hold its
|
||||||
value and return the pointer to the string. */
|
value and return the pointer to the string. */
|
||||||
llvm::Value *GetStringPtr(const std::string &str);
|
llvm::Value *GetStringPtr(const std::string &str);
|
||||||
@@ -500,6 +504,16 @@ public:
|
|||||||
llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
|
llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
|
||||||
const char *name = NULL);
|
const char *name = NULL);
|
||||||
|
|
||||||
|
/** This convenience method maps to an llvm::ShuffleVectorInst. */
|
||||||
|
llvm::Value *ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask,
|
||||||
|
const char *name = NULL);
|
||||||
|
|
||||||
|
/** This convenience method to generate broadcast pattern. It takes a value
|
||||||
|
and a vector type. Type of the value must match element type of the
|
||||||
|
vector. */
|
||||||
|
llvm::Value *BroadcastValue(llvm::Value *v, llvm::Type *vecType,
|
||||||
|
const char *name = NULL);
|
||||||
|
|
||||||
llvm::PHINode *PhiNode(llvm::Type *type, int count,
|
llvm::PHINode *PhiNode(llvm::Type *type, int count,
|
||||||
const char *name = NULL);
|
const char *name = NULL);
|
||||||
llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0,
|
llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0,
|
||||||
|
|||||||
11
expr.cpp
11
expr.cpp
@@ -3905,11 +3905,7 @@ lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr,
|
|||||||
return ptr;
|
return ptr;
|
||||||
|
|
||||||
// Onward: compute the per lane offsets.
|
// Onward: compute the per lane offsets.
|
||||||
llvm::Value *varyingOffsets =
|
llvm::Value *varyingOffsets = ctx->ProgramIndexVector();
|
||||||
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
|
||||||
for (int i = 0; i < g->target->getVectorWidth(); ++i)
|
|
||||||
varyingOffsets = ctx->InsertInst(varyingOffsets, LLVMInt32(i), i,
|
|
||||||
"varying_delta");
|
|
||||||
|
|
||||||
// And finally add the per-lane offsets. Note that we lie to the GEP
|
// And finally add the per-lane offsets. Note that we lie to the GEP
|
||||||
// call and tell it that the pointers are to uniform elements and not
|
// call and tell it that the pointers are to uniform elements and not
|
||||||
@@ -6768,9 +6764,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
|
|||||||
if (!conv)
|
if (!conv)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
llvm::Value *cast = llvm::UndefValue::get(toType->LLVMType(g->ctx));
|
llvm::Value *cast = ctx->BroadcastValue(conv, toType->LLVMType(g->ctx));
|
||||||
for (int i = 0; i < toVector->GetElementCount(); ++i)
|
|
||||||
cast = ctx->InsertInst(cast, conv, i);
|
|
||||||
return cast;
|
return cast;
|
||||||
}
|
}
|
||||||
else if (toPointerType != NULL) {
|
else if (toPointerType != NULL) {
|
||||||
|
|||||||
48
llvmutil.cpp
48
llvmutil.cpp
@@ -601,11 +601,15 @@ lGetIntValue(llvm::Value *offset) {
|
|||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
|
LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
|
||||||
llvm::Value **elements) {
|
llvm::Value **elements) {
|
||||||
for (int i = 0; i < vectorWidth; ++i)
|
for (int i = 0; i < vectorWidth; ++i) {
|
||||||
elements[i] = NULL;
|
elements[i] = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Catch a pattern of InsertElement chain.
|
||||||
|
if (llvm::InsertElementInst *ie =
|
||||||
|
llvm::dyn_cast<llvm::InsertElementInst>(inst)) {
|
||||||
while (ie != NULL) {
|
while (ie != NULL) {
|
||||||
int64_t iOffset = lGetIntValue(ie->getOperand(2));
|
int64_t iOffset = lGetIntValue(ie->getOperand(2));
|
||||||
Assert(iOffset >= 0 && iOffset < vectorWidth);
|
Assert(iOffset >= 0 && iOffset < vectorWidth);
|
||||||
@@ -618,8 +622,9 @@ LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
|
|||||||
llvm::Value *insertBase = ie->getOperand(0);
|
llvm::Value *insertBase = ie->getOperand(0);
|
||||||
ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
|
ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
|
||||||
if (ie == NULL) {
|
if (ie == NULL) {
|
||||||
if (llvm::isa<llvm::UndefValue>(insertBase))
|
if (llvm::isa<llvm::UndefValue>(insertBase)) {
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Get the value out of a constant vector if that's what we
|
// Get the value out of a constant vector if that's what we
|
||||||
// have
|
// have
|
||||||
@@ -641,6 +646,31 @@ LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
|
|||||||
elements[iOffset] = cv->getOperand((int32_t)iOffset);
|
elements[iOffset] = cv->getOperand((int32_t)iOffset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
// Catch a pattern of broadcast implemented as InsertElement + Shuffle:
|
||||||
|
// %broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
|
||||||
|
// %broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
|
||||||
|
// <4 x i32> zeroinitializer
|
||||||
|
else if (llvm::ShuffleVectorInst *shuf =
|
||||||
|
llvm::dyn_cast<llvm::ShuffleVectorInst>(inst)) {
|
||||||
|
llvm::Value *indices = shuf->getOperand(2);
|
||||||
|
if (llvm::isa<llvm::ConstantAggregateZero>(indices)) {
|
||||||
|
llvm::Value *op = shuf->getOperand(0);
|
||||||
|
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(op);
|
||||||
|
if (ie != NULL &&
|
||||||
|
llvm::isa<llvm::UndefValue>(ie->getOperand(0))) {
|
||||||
|
llvm::ConstantInt *ci =
|
||||||
|
llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2));
|
||||||
|
|
||||||
|
if (ci->isZero()) {
|
||||||
|
for (int i = 0; i < vectorWidth; ++i) {
|
||||||
|
elements[i] = ie->getOperand(1);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -694,10 +724,10 @@ lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength,
|
|||||||
else
|
else
|
||||||
Assert(LLVMVectorValuesAllEqual(val));
|
Assert(LLVMVectorValuesAllEqual(val));
|
||||||
|
|
||||||
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(val);
|
if (llvm::isa<llvm::InsertElementInst>(val) ||
|
||||||
if (ie != NULL) {
|
llvm::isa<llvm::ShuffleVectorInst>(val)) {
|
||||||
llvm::Value *elts[ISPC_MAX_NVEC];
|
llvm::Value *elts[ISPC_MAX_NVEC];
|
||||||
LLVMFlattenInsertChain(ie, g->target->getVectorWidth(), elts);
|
LLVMFlattenInsertChain(val, g->target->getVectorWidth(), elts);
|
||||||
// We just need to check the scalar first value, since we know that
|
// We just need to check the scalar first value, since we know that
|
||||||
// all elements are equal
|
// all elements are equal
|
||||||
return lIsExactMultiple(elts[0], baseValue, vectorLength,
|
return lIsExactMultiple(elts[0], baseValue, vectorLength,
|
||||||
@@ -1440,10 +1470,10 @@ lExtractFirstVectorElement(llvm::Value *v,
|
|||||||
|
|
||||||
// If we have a chain of insertelement instructions, then we can just
|
// If we have a chain of insertelement instructions, then we can just
|
||||||
// flatten them out and grab the value for the first one.
|
// flatten them out and grab the value for the first one.
|
||||||
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(v);
|
if (llvm::isa<llvm::InsertElementInst>(v) ||
|
||||||
if (ie != NULL) {
|
llvm::isa<llvm::ShuffleVectorInst>(v)) {
|
||||||
llvm::Value *elements[ISPC_MAX_NVEC];
|
llvm::Value *elements[ISPC_MAX_NVEC];
|
||||||
LLVMFlattenInsertChain(ie, vt->getNumElements(), elements);
|
LLVMFlattenInsertChain(v, vt->getNumElements(), elements);
|
||||||
return elements[0];
|
return elements[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -264,8 +264,13 @@ extern bool LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts);
|
|||||||
constant vector. For anything more complex (e.g. some other arbitrary
|
constant vector. For anything more complex (e.g. some other arbitrary
|
||||||
value, it doesn't try to extract element values into the returned
|
value, it doesn't try to extract element values into the returned
|
||||||
array.
|
array.
|
||||||
|
|
||||||
|
This also handles common broadcast pattern:
|
||||||
|
%broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
|
||||||
|
%broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
|
||||||
|
<4 x i32> zeroinitializer
|
||||||
*/
|
*/
|
||||||
extern void LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
|
extern void LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
|
||||||
llvm::Value **elements);
|
llvm::Value **elements);
|
||||||
|
|
||||||
/** This is a utility routine for debugging that dumps out the given LLVM
|
/** This is a utility routine for debugging that dumps out the given LLVM
|
||||||
|
|||||||
13
opt.cpp
13
opt.cpp
@@ -1058,10 +1058,10 @@ lCheckForActualPointer(llvm::Value *v) {
|
|||||||
*/
|
*/
|
||||||
static llvm::Value *
|
static llvm::Value *
|
||||||
lGetBasePointer(llvm::Value *v) {
|
lGetBasePointer(llvm::Value *v) {
|
||||||
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(v);
|
if (llvm::isa<llvm::InsertElementInst>(v) ||
|
||||||
if (ie != NULL) {
|
llvm::isa<llvm::ShuffleVectorInst>(v)) {
|
||||||
llvm::Value *elements[ISPC_MAX_NVEC];
|
llvm::Value *elements[ISPC_MAX_NVEC];
|
||||||
LLVMFlattenInsertChain(ie, g->target->getVectorWidth(), elements);
|
LLVMFlattenInsertChain(v, g->target->getVectorWidth(), elements);
|
||||||
|
|
||||||
// Make sure none of the elements is undefined.
|
// Make sure none of the elements is undefined.
|
||||||
// TODO: it's probably ok to allow undefined elements and return
|
// TODO: it's probably ok to allow undefined elements and return
|
||||||
@@ -1080,9 +1080,12 @@ lGetBasePointer(llvm::Value *v) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// This case comes up with global/static arrays
|
// This case comes up with global/static arrays
|
||||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v);
|
if (llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v)) {
|
||||||
if (cv != NULL)
|
|
||||||
return lCheckForActualPointer(cv->getSplatValue());
|
return lCheckForActualPointer(cv->getSplatValue());
|
||||||
|
}
|
||||||
|
else if (llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(v)) {
|
||||||
|
return lCheckForActualPointer(cdv->getSplatValue());
|
||||||
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|||||||
30
stmt.cpp
30
stmt.cpp
@@ -1272,11 +1272,8 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
|
|||||||
const std::vector<int> &spans) {
|
const std::vector<int> &spans) {
|
||||||
// Smear the uniform counter value out to be varying
|
// Smear the uniform counter value out to be varying
|
||||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
||||||
llvm::Value *smearCounter =
|
llvm::Value *smearCounter = ctx->BroadcastValue(
|
||||||
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
counter, LLVMTypes::Int32VectorType, "smear_counter");
|
||||||
for (int i = 0; i < g->target->getVectorWidth(); ++i)
|
|
||||||
smearCounter =
|
|
||||||
ctx->InsertInst(smearCounter, counter, i, "smear_counter");
|
|
||||||
|
|
||||||
// Figure out the offsets; this is a little bit tricky. As an example,
|
// Figure out the offsets; this is a little bit tricky. As an example,
|
||||||
// consider a 2D tiled foreach loop, where we're running 8-wide and
|
// consider a 2D tiled foreach loop, where we're running 8-wide and
|
||||||
@@ -1517,9 +1514,9 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
|
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
|
||||||
dimVariables[i]->storagePtr, span);
|
dimVariables[i]->storagePtr, span);
|
||||||
|
|
||||||
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||||
for (int j = 0; j < g->target->getVectorWidth(); ++j)
|
endVals[i], LLVMTypes::Int32VectorType, "smear_end");
|
||||||
smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end");
|
|
||||||
// Do a vector compare of its value to the end value to generate a
|
// Do a vector compare of its value to the end value to generate a
|
||||||
// mask for this last bit of work.
|
// mask for this last bit of work.
|
||||||
llvm::Value *emask =
|
llvm::Value *emask =
|
||||||
@@ -1662,9 +1659,9 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
ctx->SetCurrentBasicBlock(bbPartial); {
|
ctx->SetCurrentBasicBlock(bbPartial); {
|
||||||
llvm::Value *varyingCounter =
|
llvm::Value *varyingCounter =
|
||||||
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
|
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
|
||||||
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||||
for (int j = 0; j < g->target->getVectorWidth(); ++j)
|
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||||
smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
|
|
||||||
llvm::Value *emask =
|
llvm::Value *emask =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
varyingCounter, smearEnd);
|
varyingCounter, smearEnd);
|
||||||
@@ -1758,9 +1755,8 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
llvm::Value *varyingCounter =
|
llvm::Value *varyingCounter =
|
||||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||||
dimVariables[nDims-1]->storagePtr, span);
|
dimVariables[nDims-1]->storagePtr, span);
|
||||||
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||||
for (int j = 0; j < g->target->getVectorWidth(); ++j)
|
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||||
smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
|
|
||||||
llvm::Value *emask =
|
llvm::Value *emask =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
varyingCounter, smearEnd);
|
varyingCounter, smearEnd);
|
||||||
@@ -1993,11 +1989,7 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// math...)
|
// math...)
|
||||||
|
|
||||||
// Get the "program index" vector value
|
// Get the "program index" vector value
|
||||||
llvm::Value *programIndex =
|
llvm::Value *programIndex = ctx->ProgramIndexVector();
|
||||||
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
|
|
||||||
for (int i = 0; i < g->target->getVectorWidth(); ++i)
|
|
||||||
programIndex = ctx->InsertInst(programIndex, LLVMInt32(i), i,
|
|
||||||
"prog_index");
|
|
||||||
|
|
||||||
// And smear the current lane out to a vector
|
// And smear the current lane out to a vector
|
||||||
llvm::Value *firstSet32 =
|
llvm::Value *firstSet32 =
|
||||||
|
|||||||
Reference in New Issue
Block a user