Fix bug in gather/scatter optimization passes.

When flattening chains of insertelement instructions, we didn't
handle the case where the initial insertelement was to a constant
vector (with one value set and the other values undef).

Also generalized the "do all of the instances access the same location"
check to handle the case where some of them are accessing undef
locations; these are ignored in this check, as they should correspond to the
mask being off for that lane anyway.

Fixes issue #149.
This commit is contained in:
Matt Pharr
2012-01-06 09:19:18 -08:00
parent 1abaaee73e
commit 71317e6aa6

36
opt.cpp
View File

@@ -957,8 +957,16 @@ lFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
llvm::Value *insertBase = ie->getOperand(0); llvm::Value *insertBase = ie->getOperand(0);
ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase); ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
if (ie == NULL) if (ie == NULL) {
Assert(llvm::isa<llvm::UndefValue>(insertBase)); if (llvm::isa<llvm::UndefValue>(insertBase))
return;
llvm::ConstantVector *cv =
llvm::dyn_cast<llvm::ConstantVector>(insertBase);
Assert(cv != NULL);
Assert(iOffset < (int)cv->getNumOperands());
elements[iOffset] = cv->getOperand(iOffset);
}
} }
} }
@@ -1884,19 +1892,27 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
llvm::Value *elements[ISPC_MAX_NVEC]; llvm::Value *elements[ISPC_MAX_NVEC];
lFlattenInsertChain(ie, vectorLength, elements); lFlattenInsertChain(ie, vectorLength, elements);
for (int i = 0; i < vectorLength-1; ++i) { // We will ignore any values of elements[] that are NULL; as they
// TODO: It's not clear what to do in this case (which // correspond to undefined values--we just want to see if all of
// corresponds to elements of the vector being undef). It is // the defined values have the same value.
// probably to just ignore undef elements and return true if int lastNonNull = 0;
// all of the other ones are equal, but it'd be nice to have while (lastNonNull < vectorLength && elements[lastNonNull] == NULL)
// some test cases to verify this. ++lastNonNull;
Assert(elements[i] != NULL && elements[i+1] != NULL);
if (lastNonNull == vectorLength)
// all of them are undef!
return true;
for (int i = lastNonNull; i < vectorLength; ++i) {
if (elements[i] == NULL)
continue;
std::vector<llvm::PHINode *> seenPhi0; std::vector<llvm::PHINode *> seenPhi0;
std::vector<llvm::PHINode *> seenPhi1; std::vector<llvm::PHINode *> seenPhi1;
if (lValuesAreEqual(elements[i], elements[i+1], seenPhi0, if (lValuesAreEqual(elements[lastNonNull], elements[i], seenPhi0,
seenPhi1) == false) seenPhi1) == false)
return false; return false;
lastNonNull = i;
} }
return true; return true;
} }