From 71317e6aa6c280c09ea1c69cc1766315cb498c7e Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 6 Jan 2012 09:19:18 -0800 Subject: [PATCH] Fix bug in gather/scatter optimization passes. When flattening chains of insertelement instructions, we didn't handle the case where the initial insertelement was to a constant vector (with one value set and the other values undef). Also generalized the "do all of the instances access the same location" check to handle the case where some of them are accessing undef locations; these are ignored in this check, as they should correspond to the mask being off for that lane anyway. Fixes issue #149. --- opt.cpp | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/opt.cpp b/opt.cpp index 0685509f..b30efd10 100644 --- a/opt.cpp +++ b/opt.cpp @@ -957,8 +957,16 @@ lFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth, llvm::Value *insertBase = ie->getOperand(0); ie = llvm::dyn_cast(insertBase); - if (ie == NULL) - Assert(llvm::isa(insertBase)); + if (ie == NULL) { + if (llvm::isa(insertBase)) + return; + + llvm::ConstantVector *cv = + llvm::dyn_cast(insertBase); + Assert(cv != NULL); + Assert(iOffset < (int)cv->getNumOperands()); + elements[iOffset] = cv->getOperand(iOffset); + } } } @@ -1884,19 +1892,27 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength, llvm::Value *elements[ISPC_MAX_NVEC]; lFlattenInsertChain(ie, vectorLength, elements); - for (int i = 0; i < vectorLength-1; ++i) { - // TODO: It's not clear what to do in this case (which - // corresponds to elements of the vector being undef). It is - // probably to just ignore undef elements and return true if - // all of the other ones are equal, but it'd be nice to have - // some test cases to verify this. - Assert(elements[i] != NULL && elements[i+1] != NULL); + // We will ignore any values of elements[] that are NULL; as they + // correspond to undefined values--we just want to see if all of + // the defined values have the same value. + int lastNonNull = 0; + while (lastNonNull < vectorLength && elements[lastNonNull] == NULL) + ++lastNonNull; + + if (lastNonNull == vectorLength) + // all of them are undef! + return true; + + for (int i = lastNonNull; i < vectorLength; ++i) { + if (elements[i] == NULL) + continue; std::vector seenPhi0; std::vector seenPhi1; - if (lValuesAreEqual(elements[i], elements[i+1], seenPhi0, + if (lValuesAreEqual(elements[lastNonNull], elements[i], seenPhi0, seenPhi1) == false) return false; + lastNonNull = i; } return true; }