Improvements to code generation for "foreach"
Specialize the code for the innermost loop to not do any masking computations for the innermost dimension for the iterations where we are certainly working on a full vector's worth of data. This fix improves performance/code quality of "foreach" such that it's essentially the same as the equivalent "for" loop. Fixes issue #151.
This commit is contained in:
4
ctx.cpp
4
ctx.cpp
@@ -576,7 +576,7 @@ FunctionEmitContext::EndLoop() {
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::StartForeach(llvm::BasicBlock *ct) {
|
||||
FunctionEmitContext::StartForeach() {
|
||||
// Store the current values of various loop-related state so that we
|
||||
// can restore it when we exit this loop.
|
||||
llvm::Value *oldMask = GetInternalMask();
|
||||
@@ -588,7 +588,7 @@ FunctionEmitContext::StartForeach(llvm::BasicBlock *ct) {
|
||||
|
||||
continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "foreach_continue_lanes");
|
||||
StoreInst(LLVMMaskAllOff, continueLanesPtr);
|
||||
continueTarget = ct;
|
||||
continueTarget = NULL; // should be set by SetContinueTarget()
|
||||
|
||||
loopMask = NULL;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user