Merge pull request #467 from dbabokin/broadcast

Broadcast implementation as InsertElement+Shuffle and related improvements
This commit is contained in:
jbrodman
2013-04-11 13:42:56 -07:00
8 changed files with 301 additions and 125 deletions

View File

@@ -4395,16 +4395,21 @@ public:
static char ID; static char ID;
llvm::Module *module; llvm::Module *module;
int vectorWidth; unsigned int vectorWidth;
private:
unsigned int ChainLength(llvm::InsertElementInst *inst) const;
llvm::Value *getInsertChainSmearValue(llvm::Instruction* inst) const;
llvm::Value *getShuffleSmearValue(llvm::Instruction* inst) const;
}; };
char SmearCleanupPass::ID = 0; char SmearCleanupPass::ID = 0;
static int unsigned int
lChainLength(llvm::InsertElementInst *inst) { SmearCleanupPass::ChainLength(llvm::InsertElementInst *inst) const {
int length = 0; unsigned int length = 0;
while (inst != NULL) { while (inst != NULL) {
++length; ++length;
inst = llvm::dyn_cast<llvm::InsertElementInst>(inst->getOperand(0)); inst = llvm::dyn_cast<llvm::InsertElementInst>(inst->getOperand(0));
@@ -4413,45 +4418,105 @@ lChainLength(llvm::InsertElementInst *inst) {
} }
llvm::Value *
SmearCleanupPass::getInsertChainSmearValue(llvm::Instruction* inst) const {
// TODO: we don't check indexes where we do insertion, so we may trigger
// transformation for a wrong chain.
// This way of doing broadcast is obsolete and should be probably removed
// some day.
llvm::InsertElementInst *insertInst =
llvm::dyn_cast<llvm::InsertElementInst>(inst);
if (!insertInst) {
return NULL;
}
// We consider only chians of vectorWidth length.
if (ChainLength(insertInst) != vectorWidth) {
return NULL;
}
// FIXME: we only want to do this to vectors with width equal to
// the target vector width. But we can't easily get that here, so
// for now we at least avoid one case where we definitely don't
// want to do this.
llvm::VectorType *vt = llvm::dyn_cast<llvm::VectorType>(insertInst->getType());
if (vt->getNumElements() == 1) {
return NULL;
}
llvm::Value *smearValue = NULL;
while (insertInst != NULL) {
// operand 1 is inserted value
llvm::Value *insertValue = insertInst->getOperand(1);
if (smearValue == NULL) {
smearValue = insertValue;
}
else if (smearValue != insertValue) {
return NULL;
}
// operand 0 is a vector to insert into.
insertInst =
llvm::dyn_cast<llvm::InsertElementInst>(insertInst->getOperand(0));
}
assert(smearValue != NULL);
return smearValue;
}
llvm::Value *
SmearCleanupPass::getShuffleSmearValue(llvm::Instruction* inst) const {
llvm::ShuffleVectorInst *shuffleInst =
llvm::dyn_cast<llvm::ShuffleVectorInst>(inst);
if (!shuffleInst) {
return NULL;
}
llvm::Constant* mask =
llvm::dyn_cast<llvm::Constant>(shuffleInst->getOperand(2));
// Check that the shuffle is a broadcast of the first element of the first vector,
// i.e. mask vector is all-zeros vector of expected size.
if (!(mask &&
mask->isNullValue() &&
llvm::dyn_cast<llvm::VectorType>(mask->getType())->getNumElements() == vectorWidth)) {
return NULL;
}
llvm::InsertElementInst *insertInst =
llvm::dyn_cast<llvm::InsertElementInst>(shuffleInst->getOperand(0));
// Check that it's an InsertElementInst that inserts a value to first element.
if (!(insertInst &&
llvm::isa<llvm::Constant>(insertInst->getOperand(2)) &&
llvm::dyn_cast<llvm::Constant>(insertInst->getOperand(2))->isNullValue())) {
return NULL;
}
llvm::Value *result = insertInst->getOperand(1);
return result;
}
bool bool
SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) { SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
bool modifiedAny = false; bool modifiedAny = false;
restart: restart:
for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) { for (llvm::BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
llvm::InsertElementInst *insertInst = llvm::Value *smearValue = NULL;
llvm::dyn_cast<llvm::InsertElementInst>(&*iter);
if (insertInst == NULL) if (!(smearValue = getInsertChainSmearValue(iter)) &&
!(smearValue = getShuffleSmearValue(iter))) {
continue; continue;
// Only do this on the last insert in a chain...
if (lChainLength(insertInst) != vectorWidth)
continue;
// FIXME: we only want to do this to vectors with width equal to
// the target vector width. But we can't easily get that here, so
// for now we at least avoid one case where we definitely don't
// want to do this.
llvm::VectorType *vt = llvm::dyn_cast<llvm::VectorType>(insertInst->getType());
if (vt->getNumElements() == 1)
continue;
llvm::Value *toMatch = NULL;
while (insertInst != NULL) {
llvm::Value *insertValue = insertInst->getOperand(1);
if (toMatch == NULL)
toMatch = insertValue;
else if (toMatch != insertValue)
goto not_equal;
insertInst =
llvm::dyn_cast<llvm::InsertElementInst>(insertInst->getOperand(0));
} }
assert(toMatch != NULL);
{ llvm::Type *smearType = smearValue->getType();
llvm::Type *matchType = toMatch->getType(); const char *smearFuncName = lGetTypedFunc("smear", smearType, vectorWidth);
const char *smearFuncName = lGetTypedFunc("smear", matchType, vectorWidth);
if (smearFuncName != NULL) { if (smearFuncName != NULL) {
llvm::Function *smearFunc = module->getFunction(smearFuncName); llvm::Function *smearFunc = module->getFunction(smearFuncName);
if (smearFunc == NULL) { if (smearFunc == NULL) {
@@ -4460,7 +4525,7 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
// parameter type. // parameter type.
llvm::Constant *sf = llvm::Constant *sf =
module->getOrInsertFunction(smearFuncName, iter->getType(), module->getOrInsertFunction(smearFuncName, iter->getType(),
matchType, NULL); smearType, NULL);
smearFunc = llvm::dyn_cast<llvm::Function>(sf); smearFunc = llvm::dyn_cast<llvm::Function>(sf);
assert(smearFunc != NULL); assert(smearFunc != NULL);
#if defined(LLVM_3_1) #if defined(LLVM_3_1)
@@ -4473,10 +4538,10 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
} }
assert(smearFunc != NULL); assert(smearFunc != NULL);
llvm::Value *args[1] = { toMatch }; llvm::Value *args[1] = { smearValue };
llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[1]); llvm::ArrayRef<llvm::Value *> argArray(&args[0], &args[1]);
llvm::Instruction *smearCall = llvm::Instruction *smearCall =
llvm::CallInst::Create(smearFunc, argArray, LLVMGetName(toMatch, "_smear"), llvm::CallInst::Create(smearFunc, argArray, LLVMGetName(smearValue, "_smear"),
(llvm::Instruction *)NULL); (llvm::Instruction *)NULL);
ReplaceInstWithInst(iter, smearCall); ReplaceInstWithInst(iter, smearCall);
@@ -4485,9 +4550,6 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
goto restart; goto restart;
} }
} }
not_equal:
;
}
return modifiedAny; return modifiedAny;
} }

105
ctx.cpp
View File

@@ -1379,6 +1379,19 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
#endif #endif
} }
llvm::Value *
FunctionEmitContext::ProgramIndexVector(bool is32bits) {
llvm::SmallVector<llvm::Constant*, 16> array;
for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i);
array.push_back(C);
}
llvm::Constant* index = llvm::ConstantVector::get(array);
return index;
}
llvm::Value * llvm::Value *
FunctionEmitContext::GetStringPtr(const std::string &str) { FunctionEmitContext::GetStringPtr(const std::string &str) {
@@ -1729,26 +1742,31 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
llvm::Value *ret = NULL; llvm::Value *ret = NULL;
llvm::Type *eltType = value->getType(); llvm::Type *eltType = value->getType();
llvm::Type *vecType = NULL;
llvm::PointerType *pt = llvm::PointerType *pt =
llvm::dyn_cast<llvm::PointerType>(eltType); llvm::dyn_cast<llvm::PointerType>(eltType);
if (pt != NULL) { if (pt != NULL) {
// Varying pointers are represented as vectors of i32/i64s // Varying pointers are represented as vectors of i32/i64s
ret = llvm::UndefValue::get(LLVMTypes::VoidPointerVectorType); vecType = LLVMTypes::VoidPointerVectorType;
value = PtrToIntInst(value); value = PtrToIntInst(value);
} }
else else {
// All other varying types are represented as vectors of the // All other varying types are represented as vectors of the
// underlying type. // underlying type.
ret = llvm::UndefValue::get(llvm::VectorType::get(eltType, vecType = llvm::VectorType::get(eltType, g->target->getVectorWidth());
g->target->getVectorWidth()));
for (int i = 0; i < g->target->getVectorWidth(); ++i) {
llvm::Twine n = llvm::Twine("smear.") + llvm::Twine(name ? name : "") +
llvm::Twine(i);
ret = InsertInst(ret, value, i, n.str().c_str());
} }
// Check for a constant case.
if (llvm::Constant *const_val = llvm::dyn_cast<llvm::Constant>(value)) {
ret = llvm::ConstantVector::getSplat(
g->target->getVectorWidth(),
const_val);
return ret;
}
ret = BroadcastValue(value, vecType, name);
return ret; return ret;
} }
@@ -3131,6 +3149,66 @@ FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
} }
llvm::Value *
FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask,
const char *name) {
if (v1 == NULL || v2 == NULL || mask == NULL) {
AssertPos(currentPos, m->errorCount > 0);
return NULL;
}
if (name == NULL) {
char buf[32];
sprintf(buf, "_shuffle");
name = LLVMGetName(v1, buf);
}
llvm::Instruction *ii = new llvm::ShuffleVectorInst(v1, v2, mask, name, bblock);
AddDebugPos(ii);
return ii;
}
llvm::Value *
FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type* vecType,
const char *name) {
if (v == NULL || vecType == NULL) {
AssertPos(currentPos, m->errorCount > 0);
return NULL;
}
llvm::VectorType *ty = llvm::dyn_cast<llvm::VectorType>(vecType);
Assert(ty && ty->getVectorElementType() == v->getType());
if (name == NULL) {
char buf[32];
sprintf(buf, "_broadcast");
name = LLVMGetName(v, buf);
}
// Generate the follwoing sequence:
// %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0
// %name.i = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
// <4 x i32> zeroinitializer
llvm::Value *undef1 = llvm::UndefValue::get(vecType);
llvm::Value *undef2 = llvm::UndefValue::get(vecType);
// InsertElement
llvm::Twine tw = llvm::Twine(name) + llvm::Twine("_init");
llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str());
// ShuffleVector
llvm::Constant *zeroVec = llvm::ConstantVector::getSplat(
vecType->getVectorNumElements(),
llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, name);
return ret;
}
llvm::PHINode * llvm::PHINode *
FunctionEmitContext::PhiNode(llvm::Type *type, int count, FunctionEmitContext::PhiNode(llvm::Type *type, int count,
const char *name) { const char *name) {
@@ -3509,12 +3587,9 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr,
unifSize = SmearUniform(unifSize); unifSize = SmearUniform(unifSize);
// Compute offset = <0, 1, .. > * unifSize // Compute offset = <0, 1, .. > * unifSize
llvm::Value *varyingOffsets = llvm::UndefValue::get(unifSize->getType()); bool is32bits = g->target->is32Bit() || g->opt.force32BitAddressing;
for (int i = 0; i < g->target->getVectorWidth(); ++i) { llvm::Value *varyingOffsets = ProgramIndexVector(is32bits);
llvm::Value *iValue = (g->target->is32Bit() || g->opt.force32BitAddressing) ?
LLVMInt32(i) : LLVMInt64(i);
varyingOffsets = InsertInst(varyingOffsets, iValue, i, "varying_delta");
}
llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize, llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize,
varyingOffsets); varyingOffsets);

16
ctx.h
View File

@@ -1,5 +1,5 @@
/* /*
Copyright (c) 2010-2012, Intel Corporation Copyright (c) 2010-2013, Intel Corporation
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@@ -295,6 +295,10 @@ public:
that indicates whether the two masks are equal. */ that indicates whether the two masks are equal. */
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2); llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
/** Generate ConstantVector, which contains ProgramIndex, i.e.
< i32 0, i32 1, i32 2, i32 3> */
llvm::Value *ProgramIndexVector(bool is32bits = true);
/** Given a string, create an anonymous global variable to hold its /** Given a string, create an anonymous global variable to hold its
value and return the pointer to the string. */ value and return the pointer to the string. */
llvm::Value *GetStringPtr(const std::string &str); llvm::Value *GetStringPtr(const std::string &str);
@@ -500,6 +504,16 @@ public:
llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
const char *name = NULL); const char *name = NULL);
/** This convenience method maps to an llvm::ShuffleVectorInst. */
llvm::Value *ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask,
const char *name = NULL);
/** This convenience method to generate broadcast pattern. It takes a value
and a vector type. Type of the value must match element type of the
vector. */
llvm::Value *BroadcastValue(llvm::Value *v, llvm::Type *vecType,
const char *name = NULL);
llvm::PHINode *PhiNode(llvm::Type *type, int count, llvm::PHINode *PhiNode(llvm::Type *type, int count,
const char *name = NULL); const char *name = NULL);
llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0,

View File

@@ -3905,11 +3905,7 @@ lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr,
return ptr; return ptr;
// Onward: compute the per lane offsets. // Onward: compute the per lane offsets.
llvm::Value *varyingOffsets = llvm::Value *varyingOffsets = ctx->ProgramIndexVector();
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int i = 0; i < g->target->getVectorWidth(); ++i)
varyingOffsets = ctx->InsertInst(varyingOffsets, LLVMInt32(i), i,
"varying_delta");
// And finally add the per-lane offsets. Note that we lie to the GEP // And finally add the per-lane offsets. Note that we lie to the GEP
// call and tell it that the pointers are to uniform elements and not // call and tell it that the pointers are to uniform elements and not
@@ -6768,9 +6764,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
if (!conv) if (!conv)
return NULL; return NULL;
llvm::Value *cast = llvm::UndefValue::get(toType->LLVMType(g->ctx)); llvm::Value *cast = ctx->BroadcastValue(conv, toType->LLVMType(g->ctx));
for (int i = 0; i < toVector->GetElementCount(); ++i)
cast = ctx->InsertInst(cast, conv, i);
return cast; return cast;
} }
else if (toPointerType != NULL) { else if (toPointerType != NULL) {

View File

@@ -601,11 +601,15 @@ lGetIntValue(llvm::Value *offset) {
void void
LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth, LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
llvm::Value **elements) { llvm::Value **elements) {
for (int i = 0; i < vectorWidth; ++i) for (int i = 0; i < vectorWidth; ++i) {
elements[i] = NULL; elements[i] = NULL;
}
// Catch a pattern of InsertElement chain.
if (llvm::InsertElementInst *ie =
llvm::dyn_cast<llvm::InsertElementInst>(inst)) {
while (ie != NULL) { while (ie != NULL) {
int64_t iOffset = lGetIntValue(ie->getOperand(2)); int64_t iOffset = lGetIntValue(ie->getOperand(2));
Assert(iOffset >= 0 && iOffset < vectorWidth); Assert(iOffset >= 0 && iOffset < vectorWidth);
@@ -618,8 +622,9 @@ LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
llvm::Value *insertBase = ie->getOperand(0); llvm::Value *insertBase = ie->getOperand(0);
ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase); ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
if (ie == NULL) { if (ie == NULL) {
if (llvm::isa<llvm::UndefValue>(insertBase)) if (llvm::isa<llvm::UndefValue>(insertBase)) {
return; return;
}
// Get the value out of a constant vector if that's what we // Get the value out of a constant vector if that's what we
// have // have
@@ -641,6 +646,31 @@ LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
elements[iOffset] = cv->getOperand((int32_t)iOffset); elements[iOffset] = cv->getOperand((int32_t)iOffset);
} }
} }
}
// Catch a pattern of broadcast implemented as InsertElement + Shuffle:
// %broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
// %broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
// <4 x i32> zeroinitializer
else if (llvm::ShuffleVectorInst *shuf =
llvm::dyn_cast<llvm::ShuffleVectorInst>(inst)) {
llvm::Value *indices = shuf->getOperand(2);
if (llvm::isa<llvm::ConstantAggregateZero>(indices)) {
llvm::Value *op = shuf->getOperand(0);
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(op);
if (ie != NULL &&
llvm::isa<llvm::UndefValue>(ie->getOperand(0))) {
llvm::ConstantInt *ci =
llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2));
if (ci->isZero()) {
for (int i = 0; i < vectorWidth; ++i) {
elements[i] = ie->getOperand(1);
}
return;
}
}
}
}
} }
@@ -694,10 +724,10 @@ lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength,
else else
Assert(LLVMVectorValuesAllEqual(val)); Assert(LLVMVectorValuesAllEqual(val));
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(val); if (llvm::isa<llvm::InsertElementInst>(val) ||
if (ie != NULL) { llvm::isa<llvm::ShuffleVectorInst>(val)) {
llvm::Value *elts[ISPC_MAX_NVEC]; llvm::Value *elts[ISPC_MAX_NVEC];
LLVMFlattenInsertChain(ie, g->target->getVectorWidth(), elts); LLVMFlattenInsertChain(val, g->target->getVectorWidth(), elts);
// We just need to check the scalar first value, since we know that // We just need to check the scalar first value, since we know that
// all elements are equal // all elements are equal
return lIsExactMultiple(elts[0], baseValue, vectorLength, return lIsExactMultiple(elts[0], baseValue, vectorLength,
@@ -1440,10 +1470,10 @@ lExtractFirstVectorElement(llvm::Value *v,
// If we have a chain of insertelement instructions, then we can just // If we have a chain of insertelement instructions, then we can just
// flatten them out and grab the value for the first one. // flatten them out and grab the value for the first one.
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(v); if (llvm::isa<llvm::InsertElementInst>(v) ||
if (ie != NULL) { llvm::isa<llvm::ShuffleVectorInst>(v)) {
llvm::Value *elements[ISPC_MAX_NVEC]; llvm::Value *elements[ISPC_MAX_NVEC];
LLVMFlattenInsertChain(ie, vt->getNumElements(), elements); LLVMFlattenInsertChain(v, vt->getNumElements(), elements);
return elements[0]; return elements[0];
} }

View File

@@ -264,8 +264,13 @@ extern bool LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts);
constant vector. For anything more complex (e.g. some other arbitrary constant vector. For anything more complex (e.g. some other arbitrary
value, it doesn't try to extract element values into the returned value, it doesn't try to extract element values into the returned
array. array.
This also handles common broadcast pattern:
%broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
%broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
<4 x i32> zeroinitializer
*/ */
extern void LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth, extern void LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
llvm::Value **elements); llvm::Value **elements);
/** This is a utility routine for debugging that dumps out the given LLVM /** This is a utility routine for debugging that dumps out the given LLVM

13
opt.cpp
View File

@@ -1058,10 +1058,10 @@ lCheckForActualPointer(llvm::Value *v) {
*/ */
static llvm::Value * static llvm::Value *
lGetBasePointer(llvm::Value *v) { lGetBasePointer(llvm::Value *v) {
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(v); if (llvm::isa<llvm::InsertElementInst>(v) ||
if (ie != NULL) { llvm::isa<llvm::ShuffleVectorInst>(v)) {
llvm::Value *elements[ISPC_MAX_NVEC]; llvm::Value *elements[ISPC_MAX_NVEC];
LLVMFlattenInsertChain(ie, g->target->getVectorWidth(), elements); LLVMFlattenInsertChain(v, g->target->getVectorWidth(), elements);
// Make sure none of the elements is undefined. // Make sure none of the elements is undefined.
// TODO: it's probably ok to allow undefined elements and return // TODO: it's probably ok to allow undefined elements and return
@@ -1080,9 +1080,12 @@ lGetBasePointer(llvm::Value *v) {
} }
// This case comes up with global/static arrays // This case comes up with global/static arrays
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v); if (llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v)) {
if (cv != NULL)
return lCheckForActualPointer(cv->getSplatValue()); return lCheckForActualPointer(cv->getSplatValue());
}
else if (llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(v)) {
return lCheckForActualPointer(cdv->getSplatValue());
}
return NULL; return NULL;
} }

View File

@@ -1272,11 +1272,8 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
const std::vector<int> &spans) { const std::vector<int> &spans) {
// Smear the uniform counter value out to be varying // Smear the uniform counter value out to be varying
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr); llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter = llvm::Value *smearCounter = ctx->BroadcastValue(
llvm::UndefValue::get(LLVMTypes::Int32VectorType); counter, LLVMTypes::Int32VectorType, "smear_counter");
for (int i = 0; i < g->target->getVectorWidth(); ++i)
smearCounter =
ctx->InsertInst(smearCounter, counter, i, "smear_counter");
// Figure out the offsets; this is a little bit tricky. As an example, // Figure out the offsets; this is a little bit tricky. As an example,
// consider a 2D tiled foreach loop, where we're running 8-wide and // consider a 2D tiled foreach loop, where we're running 8-wide and
@@ -1517,9 +1514,9 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i], lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
dimVariables[i]->storagePtr, span); dimVariables[i]->storagePtr, span);
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); llvm::Value *smearEnd = ctx->BroadcastValue(
for (int j = 0; j < g->target->getVectorWidth(); ++j) endVals[i], LLVMTypes::Int32VectorType, "smear_end");
smearEnd = ctx->InsertInst(smearEnd, endVals[i], j, "smear_end");
// Do a vector compare of its value to the end value to generate a // Do a vector compare of its value to the end value to generate a
// mask for this last bit of work. // mask for this last bit of work.
llvm::Value *emask = llvm::Value *emask =
@@ -1662,9 +1659,9 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
ctx->SetCurrentBasicBlock(bbPartial); { ctx->SetCurrentBasicBlock(bbPartial); {
llvm::Value *varyingCounter = llvm::Value *varyingCounter =
ctx->LoadInst(dimVariables[nDims-1]->storagePtr); ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); llvm::Value *smearEnd = ctx->BroadcastValue(
for (int j = 0; j < g->target->getVectorWidth(); ++j) endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
llvm::Value *emask = llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd); varyingCounter, smearEnd);
@@ -1758,9 +1755,8 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
llvm::Value *varyingCounter = llvm::Value *varyingCounter =
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span); dimVariables[nDims-1]->storagePtr, span);
llvm::Value *smearEnd = llvm::UndefValue::get(LLVMTypes::Int32VectorType); llvm::Value *smearEnd = ctx->BroadcastValue(
for (int j = 0; j < g->target->getVectorWidth(); ++j) endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
smearEnd = ctx->InsertInst(smearEnd, endVals[nDims-1], j, "smear_end");
llvm::Value *emask = llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd); varyingCounter, smearEnd);
@@ -1993,11 +1989,7 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
// math...) // math...)
// Get the "program index" vector value // Get the "program index" vector value
llvm::Value *programIndex = llvm::Value *programIndex = ctx->ProgramIndexVector();
llvm::UndefValue::get(LLVMTypes::Int32VectorType);
for (int i = 0; i < g->target->getVectorWidth(); ++i)
programIndex = ctx->InsertInst(programIndex, LLVMInt32(i), i,
"prog_index");
// And smear the current lane out to a vector // And smear the current lane out to a vector
llvm::Value *firstSet32 = llvm::Value *firstSet32 =