Fix codegen bug with foreach_tiled.
When the outermost dimension(s) were partially active, but the innermost dimension was all on, we'd inadvertently use an incorrect "all on" execution mask. Fixes issues #177 and #200.
This commit is contained in:
5
stmt.cpp
5
stmt.cpp
@@ -1649,11 +1649,12 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// width. Set the mask and jump to the masked loop body.
|
// width. Set the mask and jump to the masked loop body.
|
||||||
ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
|
ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
|
||||||
llvm::Value *mask;
|
llvm::Value *mask;
|
||||||
if (extrasMaskPtrs.size() == 0)
|
if (nDims == 1)
|
||||||
// 1D loop; we shouldn't ever get here anyway
|
// 1D loop; we shouldn't ever get here anyway
|
||||||
mask = LLVMMaskAllOff;
|
mask = LLVMMaskAllOff;
|
||||||
else
|
else
|
||||||
mask = ctx->LoadInst(extrasMaskPtrs.back());
|
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
||||||
|
|
||||||
ctx->SetInternalMask(mask);
|
ctx->SetInternalMask(mask);
|
||||||
|
|
||||||
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
|
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
|
||||||
|
|||||||
Reference in New Issue
Block a user