diff --git a/opt.cpp b/opt.cpp index 77663544..5e74bd5a 100644 --- a/opt.cpp +++ b/opt.cpp @@ -2494,7 +2494,6 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { constOffsets, "varying+const_offsets", callInst); - { std::vector seenPhis; if (LLVMVectorValuesAllEqual(fullOffsets, g->target.vectorWidth, seenPhis)) { // If all the offsets are equal, then compute the single @@ -2516,68 +2515,61 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { "load_braodcast"); lCopyMetadata(newCall, callInst); llvm::ReplaceInstWithInst(callInst, newCall); + + modifiedAny = true; + goto restart; } else { // A scatter with everyone going to the same location is // undefined (if there's more than one program instance in - // the gang). Issue a warning and arbitrarily let the - // first guy win. + // the gang). Issue a warning. if (g->target.vectorWidth > 1) Warning(pos, "Undefined behavior: all program instances are " "writing to the same location!"); - llvm::Value *first = - llvm::ExtractElementInst::Create(storeValue, LLVMInt32(0), "rvalue_first", - callInst); - lCopyMetadata(first, callInst); + // We could do something similar to the gather case, where + // we arbitrarily write one of the values, but we need to + // a) check to be sure the mask isn't all off and b) pick + // the value from an executing program instance in that + // case. We'll just let a bunch of the program instances + // do redundant writes, since this isn't important to make + // fast anyway... + } + } + else { + int step = gatherInfo ? gatherInfo->align : scatterInfo->align; - ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(first->getType(), 0), - "ptr2rvalue_type", callInst); + std::vector seenPhis; + if (step > 0 && lVectorIsLinear(fullOffsets, g->target.vectorWidth, + step, seenPhis)) { + // We have a linear sequence of memory locations being accessed + // starting with the location given by the offset from + // offsetElements[0], with stride of 4 or 8 bytes (for 32 bit + // and 64 bit gather/scatters, respectively.) + llvm::Value *ptr = lComputeCommonPointer(base, fullOffsets, callInst); lCopyMetadata(ptr, callInst); - llvm::Instruction *sinst = new llvm::StoreInst(first, ptr, false, - scatterInfo->align); - lCopyMetadata(sinst, callInst); - llvm::ReplaceInstWithInst(callInst, sinst); + if (gatherInfo != NULL) { + Debug(pos, "Transformed gather to unaligned vector load!"); + llvm::Instruction *newCall = + lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, "masked_load"); + lCopyMetadata(newCall, callInst); + llvm::ReplaceInstWithInst(callInst, newCall); + } + else { + Debug(pos, "Transformed scatter to unaligned vector store!"); + ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast", + callInst); + llvm::Instruction *newCall = + lCallInst(scatterInfo->maskedStoreFunc, ptr, storeValue, + mask, ""); + lCopyMetadata(newCall, callInst); + llvm::ReplaceInstWithInst(callInst, newCall); + } + + modifiedAny = true; + goto restart; } - - modifiedAny = true; - goto restart; - } - } - - int step = gatherInfo ? gatherInfo->align : scatterInfo->align; - - std::vector seenPhis; - if (step > 0 && lVectorIsLinear(fullOffsets, g->target.vectorWidth, - step, seenPhis)) { - // We have a linear sequence of memory locations being accessed - // starting with the location given by the offset from - // offsetElements[0], with stride of 4 or 8 bytes (for 32 bit - // and 64 bit gather/scatters, respectively.) - llvm::Value *ptr = lComputeCommonPointer(base, fullOffsets, callInst); - lCopyMetadata(ptr, callInst); - - if (gatherInfo != NULL) { - Debug(pos, "Transformed gather to unaligned vector load!"); - llvm::Instruction *newCall = - lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, "masked_load"); - lCopyMetadata(newCall, callInst); - llvm::ReplaceInstWithInst(callInst, newCall); - } - else { - Debug(pos, "Transformed scatter to unaligned vector store!"); - ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast", - callInst); - llvm::Instruction *newCall = - lCallInst(scatterInfo->maskedStoreFunc, ptr, storeValue, - mask, ""); - lCopyMetadata(newCall, callInst); - llvm::ReplaceInstWithInst(callInst, newCall); - } - - modifiedAny = true; - goto restart; } }