Fix codegen bug with foreach_tiled.
When the outermost dimension(s) were partially active, but the innermost dimension was all on, we'd inadvertently use an incorrect "all on" execution mask. Fixes issues #177 and #200.
This commit is contained in:
5
stmt.cpp
5
stmt.cpp
@@ -1649,11 +1649,12 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// width. Set the mask and jump to the masked loop body.
|
||||
ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
|
||||
llvm::Value *mask;
|
||||
if (extrasMaskPtrs.size() == 0)
|
||||
if (nDims == 1)
|
||||
// 1D loop; we shouldn't ever get here anyway
|
||||
mask = LLVMMaskAllOff;
|
||||
else
|
||||
mask = ctx->LoadInst(extrasMaskPtrs.back());
|
||||
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
||||
|
||||
ctx->SetInternalMask(mask);
|
||||
|
||||
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
|
||||
|
||||
Reference in New Issue
Block a user