From f73abb05a7c7f43f7ed5a4c2beb64763bfbad8f3 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 31 Jan 2012 11:06:14 -0800 Subject: [PATCH] Fix bug in handling scatters where all instances go to the same location. Previously, we'd pick one lane and generate a regular store for its value. This was the wrong thing to do, since we also should have been checking that the mask was on (for the lane that was chosen). This bug didn't become evident until the scalar target was added, since many stores fall into this case with that target. Now, we just leave those as regular scatters. Fixes most of the failing tests for the scalar target listed in issue #167. --- opt.cpp | 94 ++++++++++++++++++++++++++------------------------------- 1 file changed, 43 insertions(+), 51 deletions(-) diff --git a/opt.cpp b/opt.cpp index 77663544..5e74bd5a 100644 --- a/opt.cpp +++ b/opt.cpp @@ -2494,7 +2494,6 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { constOffsets, "varying+const_offsets", callInst); - { std::vector seenPhis; if (LLVMVectorValuesAllEqual(fullOffsets, g->target.vectorWidth, seenPhis)) { // If all the offsets are equal, then compute the single @@ -2516,68 +2515,61 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { "load_braodcast"); lCopyMetadata(newCall, callInst); llvm::ReplaceInstWithInst(callInst, newCall); + + modifiedAny = true; + goto restart; } else { // A scatter with everyone going to the same location is // undefined (if there's more than one program instance in - // the gang). Issue a warning and arbitrarily let the - // first guy win. + // the gang). Issue a warning. if (g->target.vectorWidth > 1) Warning(pos, "Undefined behavior: all program instances are " "writing to the same location!"); - llvm::Value *first = - llvm::ExtractElementInst::Create(storeValue, LLVMInt32(0), "rvalue_first", - callInst); - lCopyMetadata(first, callInst); + // We could do something similar to the gather case, where + // we arbitrarily write one of the values, but we need to + // a) check to be sure the mask isn't all off and b) pick + // the value from an executing program instance in that + // case. We'll just let a bunch of the program instances + // do redundant writes, since this isn't important to make + // fast anyway... + } + } + else { + int step = gatherInfo ? gatherInfo->align : scatterInfo->align; - ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(first->getType(), 0), - "ptr2rvalue_type", callInst); + std::vector seenPhis; + if (step > 0 && lVectorIsLinear(fullOffsets, g->target.vectorWidth, + step, seenPhis)) { + // We have a linear sequence of memory locations being accessed + // starting with the location given by the offset from + // offsetElements[0], with stride of 4 or 8 bytes (for 32 bit + // and 64 bit gather/scatters, respectively.) + llvm::Value *ptr = lComputeCommonPointer(base, fullOffsets, callInst); lCopyMetadata(ptr, callInst); - llvm::Instruction *sinst = new llvm::StoreInst(first, ptr, false, - scatterInfo->align); - lCopyMetadata(sinst, callInst); - llvm::ReplaceInstWithInst(callInst, sinst); + if (gatherInfo != NULL) { + Debug(pos, "Transformed gather to unaligned vector load!"); + llvm::Instruction *newCall = + lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, "masked_load"); + lCopyMetadata(newCall, callInst); + llvm::ReplaceInstWithInst(callInst, newCall); + } + else { + Debug(pos, "Transformed scatter to unaligned vector store!"); + ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast", + callInst); + llvm::Instruction *newCall = + lCallInst(scatterInfo->maskedStoreFunc, ptr, storeValue, + mask, ""); + lCopyMetadata(newCall, callInst); + llvm::ReplaceInstWithInst(callInst, newCall); + } + + modifiedAny = true; + goto restart; } - - modifiedAny = true; - goto restart; - } - } - - int step = gatherInfo ? gatherInfo->align : scatterInfo->align; - - std::vector seenPhis; - if (step > 0 && lVectorIsLinear(fullOffsets, g->target.vectorWidth, - step, seenPhis)) { - // We have a linear sequence of memory locations being accessed - // starting with the location given by the offset from - // offsetElements[0], with stride of 4 or 8 bytes (for 32 bit - // and 64 bit gather/scatters, respectively.) - llvm::Value *ptr = lComputeCommonPointer(base, fullOffsets, callInst); - lCopyMetadata(ptr, callInst); - - if (gatherInfo != NULL) { - Debug(pos, "Transformed gather to unaligned vector load!"); - llvm::Instruction *newCall = - lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, "masked_load"); - lCopyMetadata(newCall, callInst); - llvm::ReplaceInstWithInst(callInst, newCall); - } - else { - Debug(pos, "Transformed scatter to unaligned vector store!"); - ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast", - callInst); - llvm::Instruction *newCall = - lCallInst(scatterInfo->maskedStoreFunc, ptr, storeValue, - mask, ""); - lCopyMetadata(newCall, callInst); - llvm::ReplaceInstWithInst(callInst, newCall); - } - - modifiedAny = true; - goto restart; } }