From 46bfef3fce3a8a664d08a2597bffed2cbb2a0886 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Sun, 11 Dec 2011 16:16:36 -0800 Subject: [PATCH] Add option to turn off codegen improvements when mask 'all on' is statically known. --- ctx.cpp | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- expr.cpp | 1 + ispc.cpp | 2 +- ispc.h | 15 ++++++--------- main.cpp | 14 +++++++------- opt.cpp | 18 ++++++++++++------ stmt.cpp | 22 +++++++++++++++------- 7 files changed, 88 insertions(+), 33 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index 4aea7fdc..575b1594 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -197,6 +197,47 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, returnValuePtr = AllocaInst(ftype, "return_value_memory"); } + if (g->opt.disableMaskAllOnOptimizations) { + // This is really disgusting. We want to be able to fool the + // compiler to not be able to reason that the mask is all on, but + // we don't want to pay too much of a price at the start of each + // function to do so. + // + // Therefore: first, we declare a module-static __all_on_mask + // variable that will hold an "all on" mask value. At the start of + // each function, we'll load its value and call SetInternalMaskAnd + // with the result to set the current internal execution mask. + // (This is a no-op at runtime.) + // + // Then, to fool the optimizer that maybe the value of + // __all_on_mask can't be guaranteed to be "all on", we emit a + // dummy function that sets __all_on_mask be "all off". (That + // function is never actually called.) + llvm::Value *globalAllOnMaskPtr = + m->module->getNamedGlobal("__all_on_mask"); + if (globalAllOnMaskPtr == NULL) { + globalAllOnMaskPtr = + new llvm::GlobalVariable(*m->module, LLVMTypes::MaskType, false, + llvm::GlobalValue::InternalLinkage, + LLVMMaskAllOn, "__all_on_mask"); + + char buf[256]; + sprintf(buf, "__off_all_on_mask_%s", g->target.GetISAString()); + llvm::Constant *offFunc = + m->module->getOrInsertFunction(buf, LLVMTypes::VoidType, + NULL); + assert(llvm::isa(offFunc)); + llvm::BasicBlock *offBB = + llvm::BasicBlock::Create(*g->ctx, "entry", + (llvm::Function *)offFunc, 0); + new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB); + llvm::ReturnInst::Create(*g->ctx, offBB); + } + + llvm::Value *allOnMask = LoadInst(globalAllOnMaskPtr, "all_on_mask"); + SetInternalMaskAnd(LLVMMaskAllOn, allOnMask); + } + if (m->diBuilder) { /* If debugging is enabled, tell the debug information emission code about this new function */ @@ -271,7 +312,8 @@ FunctionEmitContext::GetFunctionMask() { llvm::Value * FunctionEmitContext::GetInternalMask() { - if (VaryingCFDepth() == 0) + if (VaryingCFDepth() == 0 && + !g->opt.disableMaskAllOnOptimizations) return LLVMMaskAllOn; else return LoadInst(internalMaskPointer, "load_mask"); @@ -281,7 +323,8 @@ FunctionEmitContext::GetInternalMask() { llvm::Value * FunctionEmitContext::GetFullMask() { llvm::Value *internalMask = GetInternalMask(); - if (internalMask == LLVMMaskAllOn && functionMaskValue == LLVMMaskAllOn) + if (internalMask == LLVMMaskAllOn && functionMaskValue == LLVMMaskAllOn && + !g->opt.disableMaskAllOnOptimizations) return LLVMMaskAllOn; else return BinaryOperator(llvm::Instruction::And, GetInternalMask(), @@ -2047,7 +2090,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr, if (ptrType->GetBaseType()->IsUniformType()) // the easy case StoreInst(value, ptr); - else if (mask == LLVMMaskAllOn) + else if (mask == LLVMMaskAllOn && !g->opt.disableMaskAllOnOptimizations) // Otherwise it is a masked store unless we can determine that the // mask is all on... (Unclear if this check is actually useful.) StoreInst(value, ptr); diff --git a/expr.cpp b/expr.cpp index a3f2f52b..93e8fe5a 100644 --- a/expr.cpp +++ b/expr.cpp @@ -659,6 +659,7 @@ lStoreAssignResult(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, assert(baseSym != NULL && baseSym->varyingCFDepth <= ctx->VaryingCFDepth()); if (!g->opt.disableMaskedStoreToStore && + !g->opt.disableMaskAllOnOptimizations && baseSym->varyingCFDepth == ctx->VaryingCFDepth() && baseSym->storageClass != SC_STATIC && dynamic_cast(baseSym->type) == NULL && diff --git a/ispc.cpp b/ispc.cpp index f74e7808..a3b26cb0 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -347,6 +347,7 @@ Opt::Opt() { force32BitAddressing = true; unrollLoops = true; disableAsserts = false; + disableMaskAllOnOptimizations = false; disableHandlePseudoMemoryOps = false; disableBlendedMaskedStores = false; disableCoherentControlFlow = false; @@ -355,7 +356,6 @@ Opt::Opt() { disableMaskedStoreToStore = false; disableGatherScatterFlattening = false; disableUniformMemoryOptimizations = false; - disableMaskedStoreOptimizations = false; } /////////////////////////////////////////////////////////////////////////// diff --git a/ispc.h b/ispc.h index f6cf079a..1662b4b3 100644 --- a/ispc.h +++ b/ispc.h @@ -250,7 +250,12 @@ struct Opt { /** Indicates whether assert() statements should be ignored (for performance in the generated code). */ bool disableAsserts; - + + /** If enabled, disables the various optimizations that kick in when + the execution mask can be determined to be "all on" at compile + time. */ + bool disableMaskAllOnOptimizations; + /** If enabled, the various __pseudo* memory ops (gather/scatter, masked load/store) are left in their __pseudo* form, for better understanding of the structure of generated code when reading @@ -302,14 +307,6 @@ struct Opt { than gathers/scatters. This is likely only useful for measuring the impact of this optimization. */ bool disableUniformMemoryOptimizations; - - /** Disables optimizations for masked stores: masked stores with the - mask all on are transformed to regular stores, and masked stores - with the mask are all off are removed (which in turn can allow - eliminating additional dead code related to computing the value - stored). This is likely only useful for measuring the impact of - this optimization. */ - bool disableMaskedStoreOptimizations; }; /** @brief This structure collects together a number of global variables. diff --git a/main.cpp b/main.cpp index f13544e3..995420b7 100644 --- a/main.cpp +++ b/main.cpp @@ -92,15 +92,15 @@ static void usage(int ret) { printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n"); printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n"); #if 0 - printf(" disable-handle-pseudo-memory-ops\n"); + printf(" disable-all-on-optimizations\n"); printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n"); - printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n"); - printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n"); - printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n"); printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n"); + printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n"); printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n"); + printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n"); + printf(" disable-handle-pseudo-memory-ops\n"); + printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n"); printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n"); - printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n"); #endif #ifndef ISPC_IS_WINDOWS printf(" [--pic]\t\t\t\tGenerate position-independent code\n"); @@ -271,6 +271,8 @@ int main(int Argc, char *Argv[]) { // These are only used for performance tests of specific // optimizations + else if (!strcmp(opt, "disable-all-on-optimizations")) + g->opt.disableMaskAllOnOptimizations = true; else if (!strcmp(opt, "disable-handle-pseudo-memory-ops")) g->opt.disableHandlePseudoMemoryOps = true; else if (!strcmp(opt, "disable-blended-masked-stores")) @@ -287,8 +289,6 @@ int main(int Argc, char *Argv[]) { g->opt.disableGatherScatterFlattening = true; else if (!strcmp(opt, "disable-uniform-memory-optimizations")) g->opt.disableUniformMemoryOptimizations = true; - else if (!strcmp(opt, "disable-masked-store-optimizations")) - g->opt.disableMaskedStoreOptimizations = true; else usage(1); } diff --git a/opt.cpp b/opt.cpp index ecb9340d..f20badf1 100644 --- a/opt.cpp +++ b/opt.cpp @@ -250,7 +250,7 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createReassociatePass()); optPM.add(llvm::createConstantPropagationPass()); - if (!g->opt.disableMaskedStoreOptimizations) { + if (!g->opt.disableMaskAllOnOptimizations) { optPM.add(CreateIntrinsicsOptPass()); optPM.add(CreateMaskedStoreOptPass()); } @@ -287,7 +287,7 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createTailCallEliminationPass()); - if (!g->opt.disableMaskedStoreOptimizations) { + if (!g->opt.disableMaskAllOnOptimizations) { optPM.add(CreateIntrinsicsOptPass()); optPM.add(CreateMaskedStoreOptPass()); } @@ -334,12 +334,16 @@ Optimize(llvm::Module *module, int optLevel) { builder.DisableUnrollLoops = true; builder.populateFunctionPassManager(funcPM); builder.populateModulePassManager(optPM); - optPM.add(CreateIsCompileTimeConstantPass(true)); + + optPM.add(CreateIsCompileTimeConstantPass(false)); optPM.add(CreateIntrinsicsOptPass()); + builder.populateLTOPassManager(optPM, true /* internalize */, true /* inline once again */); + optPM.add(CreateIsCompileTimeConstantPass(true)); optPM.add(CreateIntrinsicsOptPass()); + builder.populateModulePassManager(optPM); #endif optPM.add(CreateMakeInternalFuncsStaticPass()); @@ -2221,9 +2225,11 @@ IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) { // not a __is_compile_time_constant_* function continue; - // This optimization pass can be disabled with the (poorly named) - // disableGatherScatterFlattening option. - if (g->opt.disableGatherScatterFlattening) { + // This optimization pass can be disabled with both the (poorly + // named) disableGatherScatterFlattening option and + // disableMaskAllOnOptimizations. + if (g->opt.disableGatherScatterFlattening || + g->opt.disableMaskAllOnOptimizations) { llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, LLVMFalse); modifiedAny = true; goto restart; diff --git a/stmt.cpp b/stmt.cpp index e10616b5..a23fb70a 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -868,7 +868,9 @@ lSafeToRunWithAllLanesOff(Stmt *stmt) { void IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const { llvm::Value *oldMask = ctx->GetInternalMask(); - if (ctx->GetFullMask() == LLVMMaskAllOn && !g->opt.disableCoherentControlFlow) { + if (ctx->GetFullMask() == LLVMMaskAllOn && + !g->opt.disableCoherentControlFlow && + !g->opt.disableMaskAllOnOptimizations) { // We can tell that the mask is on statically at compile time; just // emit code for the 'if test with the mask all on' path llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done"); @@ -952,9 +954,11 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest, // code emitted here can operate with the knowledge that the mask is // definitely all on (until it modifies the mask itself). assert(!g->opt.disableCoherentControlFlow); - ctx->SetInternalMask(LLVMMaskAllOn); + if (!g->opt.disableMaskAllOnOptimizations) + ctx->SetInternalMask(LLVMMaskAllOn); llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); - ctx->SetFunctionMask(LLVMMaskAllOn); + if (!g->opt.disableMaskAllOnOptimizations) + ctx->SetFunctionMask(LLVMMaskAllOn); // First, check the value of the test. If it's all on, then we jump to // a basic block that will only have code for the true case. @@ -1156,9 +1160,11 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const { // IfStmt::emitCoherentTests()), and then emit the code for the // loop body. ctx->SetCurrentBasicBlock(bAllOn); - ctx->SetInternalMask(LLVMMaskAllOn); + if (!g->opt.disableMaskAllOnOptimizations) + ctx->SetInternalMask(LLVMMaskAllOn); llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); - ctx->SetFunctionMask(LLVMMaskAllOn); + if (!g->opt.disableMaskAllOnOptimizations) + ctx->SetFunctionMask(LLVMMaskAllOn); if (bodyStmts) bodyStmts->EmitCode(ctx); assert(ctx->GetCurrentBasicBlock()); @@ -1379,9 +1385,11 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const { // the runtime test has passed, make this fact clear for code // generation at compile time here.) ctx->SetCurrentBasicBlock(bAllOn); - ctx->SetInternalMask(LLVMMaskAllOn); + if (!g->opt.disableMaskAllOnOptimizations) + ctx->SetInternalMask(LLVMMaskAllOn); llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); - ctx->SetFunctionMask(LLVMMaskAllOn); + if (!g->opt.disableMaskAllOnOptimizations) + ctx->SetFunctionMask(LLVMMaskAllOn); if (stmts) stmts->EmitCode(ctx); assert(ctx->GetCurrentBasicBlock());