Fix bug in handling scatters where all instances go to the same location.

Previously, we'd pick one lane and generate a regular store for its value.
This was the wrong thing to do, since we also should have been checking
that the mask was on (for the lane that was chosen).  This bug didn't
become evident until the scalar target was added, since many stores fall
into this case with that target.

Now, we just leave those as regular scatters.

Fixes most of the failing tests for the scalar target listed in issue #167.
This commit is contained in:
Matt Pharr
2012-01-31 11:06:14 -08:00
parent d71c49494f
commit f73abb05a7

94
opt.cpp
View File

@@ -2494,7 +2494,6 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
constOffsets, "varying+const_offsets",
callInst);
{
std::vector<llvm::PHINode *> seenPhis;
if (LLVMVectorValuesAllEqual(fullOffsets, g->target.vectorWidth, seenPhis)) {
// If all the offsets are equal, then compute the single
@@ -2516,68 +2515,61 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
"load_braodcast");
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
modifiedAny = true;
goto restart;
}
else {
// A scatter with everyone going to the same location is
// undefined (if there's more than one program instance in
// the gang). Issue a warning and arbitrarily let the
// first guy win.
// the gang). Issue a warning.
if (g->target.vectorWidth > 1)
Warning(pos, "Undefined behavior: all program instances are "
"writing to the same location!");
llvm::Value *first =
llvm::ExtractElementInst::Create(storeValue, LLVMInt32(0), "rvalue_first",
callInst);
lCopyMetadata(first, callInst);
// We could do something similar to the gather case, where
// we arbitrarily write one of the values, but we need to
// a) check to be sure the mask isn't all off and b) pick
// the value from an executing program instance in that
// case. We'll just let a bunch of the program instances
// do redundant writes, since this isn't important to make
// fast anyway...
}
}
else {
int step = gatherInfo ? gatherInfo->align : scatterInfo->align;
ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(first->getType(), 0),
"ptr2rvalue_type", callInst);
std::vector<llvm::PHINode *> seenPhis;
if (step > 0 && lVectorIsLinear(fullOffsets, g->target.vectorWidth,
step, seenPhis)) {
// We have a linear sequence of memory locations being accessed
// starting with the location given by the offset from
// offsetElements[0], with stride of 4 or 8 bytes (for 32 bit
// and 64 bit gather/scatters, respectively.)
llvm::Value *ptr = lComputeCommonPointer(base, fullOffsets, callInst);
lCopyMetadata(ptr, callInst);
llvm::Instruction *sinst = new llvm::StoreInst(first, ptr, false,
scatterInfo->align);
lCopyMetadata(sinst, callInst);
llvm::ReplaceInstWithInst(callInst, sinst);
if (gatherInfo != NULL) {
Debug(pos, "Transformed gather to unaligned vector load!");
llvm::Instruction *newCall =
lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, "masked_load");
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
else {
Debug(pos, "Transformed scatter to unaligned vector store!");
ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast",
callInst);
llvm::Instruction *newCall =
lCallInst(scatterInfo->maskedStoreFunc, ptr, storeValue,
mask, "");
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
modifiedAny = true;
goto restart;
}
modifiedAny = true;
goto restart;
}
}
int step = gatherInfo ? gatherInfo->align : scatterInfo->align;
std::vector<llvm::PHINode *> seenPhis;
if (step > 0 && lVectorIsLinear(fullOffsets, g->target.vectorWidth,
step, seenPhis)) {
// We have a linear sequence of memory locations being accessed
// starting with the location given by the offset from
// offsetElements[0], with stride of 4 or 8 bytes (for 32 bit
// and 64 bit gather/scatters, respectively.)
llvm::Value *ptr = lComputeCommonPointer(base, fullOffsets, callInst);
lCopyMetadata(ptr, callInst);
if (gatherInfo != NULL) {
Debug(pos, "Transformed gather to unaligned vector load!");
llvm::Instruction *newCall =
lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, "masked_load");
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
else {
Debug(pos, "Transformed scatter to unaligned vector store!");
ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast",
callInst);
llvm::Instruction *newCall =
lCallInst(scatterInfo->maskedStoreFunc, ptr, storeValue,
mask, "");
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
modifiedAny = true;
goto restart;
}
}