Add option to turn off codegen improvements when mask 'all on' is statically known.

This commit is contained in:
Matt Pharr
2011-12-11 16:16:36 -08:00
parent 20536bb339
commit 46bfef3fce
7 changed files with 88 additions and 33 deletions

49
ctx.cpp
View File

@@ -197,6 +197,47 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
returnValuePtr = AllocaInst(ftype, "return_value_memory");
}
if (g->opt.disableMaskAllOnOptimizations) {
// This is really disgusting. We want to be able to fool the
// compiler to not be able to reason that the mask is all on, but
// we don't want to pay too much of a price at the start of each
// function to do so.
//
// Therefore: first, we declare a module-static __all_on_mask
// variable that will hold an "all on" mask value. At the start of
// each function, we'll load its value and call SetInternalMaskAnd
// with the result to set the current internal execution mask.
// (This is a no-op at runtime.)
//
// Then, to fool the optimizer that maybe the value of
// __all_on_mask can't be guaranteed to be "all on", we emit a
// dummy function that sets __all_on_mask be "all off". (That
// function is never actually called.)
llvm::Value *globalAllOnMaskPtr =
m->module->getNamedGlobal("__all_on_mask");
if (globalAllOnMaskPtr == NULL) {
globalAllOnMaskPtr =
new llvm::GlobalVariable(*m->module, LLVMTypes::MaskType, false,
llvm::GlobalValue::InternalLinkage,
LLVMMaskAllOn, "__all_on_mask");
char buf[256];
sprintf(buf, "__off_all_on_mask_%s", g->target.GetISAString());
llvm::Constant *offFunc =
m->module->getOrInsertFunction(buf, LLVMTypes::VoidType,
NULL);
assert(llvm::isa<llvm::Function>(offFunc));
llvm::BasicBlock *offBB =
llvm::BasicBlock::Create(*g->ctx, "entry",
(llvm::Function *)offFunc, 0);
new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
llvm::ReturnInst::Create(*g->ctx, offBB);
}
llvm::Value *allOnMask = LoadInst(globalAllOnMaskPtr, "all_on_mask");
SetInternalMaskAnd(LLVMMaskAllOn, allOnMask);
}
if (m->diBuilder) {
/* If debugging is enabled, tell the debug information emission
code about this new function */
@@ -271,7 +312,8 @@ FunctionEmitContext::GetFunctionMask() {
llvm::Value *
FunctionEmitContext::GetInternalMask() {
if (VaryingCFDepth() == 0)
if (VaryingCFDepth() == 0 &&
!g->opt.disableMaskAllOnOptimizations)
return LLVMMaskAllOn;
else
return LoadInst(internalMaskPointer, "load_mask");
@@ -281,7 +323,8 @@ FunctionEmitContext::GetInternalMask() {
llvm::Value *
FunctionEmitContext::GetFullMask() {
llvm::Value *internalMask = GetInternalMask();
if (internalMask == LLVMMaskAllOn && functionMaskValue == LLVMMaskAllOn)
if (internalMask == LLVMMaskAllOn && functionMaskValue == LLVMMaskAllOn &&
!g->opt.disableMaskAllOnOptimizations)
return LLVMMaskAllOn;
else
return BinaryOperator(llvm::Instruction::And, GetInternalMask(),
@@ -2047,7 +2090,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr,
if (ptrType->GetBaseType()->IsUniformType())
// the easy case
StoreInst(value, ptr);
else if (mask == LLVMMaskAllOn)
else if (mask == LLVMMaskAllOn && !g->opt.disableMaskAllOnOptimizations)
// Otherwise it is a masked store unless we can determine that the
// mask is all on... (Unclear if this check is actually useful.)
StoreInst(value, ptr);

View File

@@ -659,6 +659,7 @@ lStoreAssignResult(llvm::Value *value, llvm::Value *ptr, const Type *ptrType,
assert(baseSym != NULL &&
baseSym->varyingCFDepth <= ctx->VaryingCFDepth());
if (!g->opt.disableMaskedStoreToStore &&
!g->opt.disableMaskAllOnOptimizations &&
baseSym->varyingCFDepth == ctx->VaryingCFDepth() &&
baseSym->storageClass != SC_STATIC &&
dynamic_cast<const ReferenceType *>(baseSym->type) == NULL &&

View File

@@ -347,6 +347,7 @@ Opt::Opt() {
force32BitAddressing = true;
unrollLoops = true;
disableAsserts = false;
disableMaskAllOnOptimizations = false;
disableHandlePseudoMemoryOps = false;
disableBlendedMaskedStores = false;
disableCoherentControlFlow = false;
@@ -355,7 +356,6 @@ Opt::Opt() {
disableMaskedStoreToStore = false;
disableGatherScatterFlattening = false;
disableUniformMemoryOptimizations = false;
disableMaskedStoreOptimizations = false;
}
///////////////////////////////////////////////////////////////////////////

15
ispc.h
View File

@@ -250,7 +250,12 @@ struct Opt {
/** Indicates whether assert() statements should be ignored (for
performance in the generated code). */
bool disableAsserts;
/** If enabled, disables the various optimizations that kick in when
the execution mask can be determined to be "all on" at compile
time. */
bool disableMaskAllOnOptimizations;
/** If enabled, the various __pseudo* memory ops (gather/scatter,
masked load/store) are left in their __pseudo* form, for better
understanding of the structure of generated code when reading
@@ -302,14 +307,6 @@ struct Opt {
than gathers/scatters. This is likely only useful for measuring
the impact of this optimization. */
bool disableUniformMemoryOptimizations;
/** Disables optimizations for masked stores: masked stores with the
mask all on are transformed to regular stores, and masked stores
with the mask are all off are removed (which in turn can allow
eliminating additional dead code related to computing the value
stored). This is likely only useful for measuring the impact of
this optimization. */
bool disableMaskedStoreOptimizations;
};
/** @brief This structure collects together a number of global variables.

View File

@@ -92,15 +92,15 @@ static void usage(int ret) {
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
#if 0
printf(" disable-handle-pseudo-memory-ops\n");
printf(" disable-all-on-optimizations\n");
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n");
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
printf(" disable-handle-pseudo-memory-ops\n");
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
#endif
#ifndef ISPC_IS_WINDOWS
printf(" [--pic]\t\t\t\tGenerate position-independent code\n");
@@ -271,6 +271,8 @@ int main(int Argc, char *Argv[]) {
// These are only used for performance tests of specific
// optimizations
else if (!strcmp(opt, "disable-all-on-optimizations"))
g->opt.disableMaskAllOnOptimizations = true;
else if (!strcmp(opt, "disable-handle-pseudo-memory-ops"))
g->opt.disableHandlePseudoMemoryOps = true;
else if (!strcmp(opt, "disable-blended-masked-stores"))
@@ -287,8 +289,6 @@ int main(int Argc, char *Argv[]) {
g->opt.disableGatherScatterFlattening = true;
else if (!strcmp(opt, "disable-uniform-memory-optimizations"))
g->opt.disableUniformMemoryOptimizations = true;
else if (!strcmp(opt, "disable-masked-store-optimizations"))
g->opt.disableMaskedStoreOptimizations = true;
else
usage(1);
}

18
opt.cpp
View File

@@ -250,7 +250,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createReassociatePass());
optPM.add(llvm::createConstantPropagationPass());
if (!g->opt.disableMaskedStoreOptimizations) {
if (!g->opt.disableMaskAllOnOptimizations) {
optPM.add(CreateIntrinsicsOptPass());
optPM.add(CreateMaskedStoreOptPass());
}
@@ -287,7 +287,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createInstructionCombiningPass());
optPM.add(llvm::createTailCallEliminationPass());
if (!g->opt.disableMaskedStoreOptimizations) {
if (!g->opt.disableMaskAllOnOptimizations) {
optPM.add(CreateIntrinsicsOptPass());
optPM.add(CreateMaskedStoreOptPass());
}
@@ -334,12 +334,16 @@ Optimize(llvm::Module *module, int optLevel) {
builder.DisableUnrollLoops = true;
builder.populateFunctionPassManager(funcPM);
builder.populateModulePassManager(optPM);
optPM.add(CreateIsCompileTimeConstantPass(true));
optPM.add(CreateIsCompileTimeConstantPass(false));
optPM.add(CreateIntrinsicsOptPass());
builder.populateLTOPassManager(optPM, true /* internalize */,
true /* inline once again */);
optPM.add(CreateIsCompileTimeConstantPass(true));
optPM.add(CreateIntrinsicsOptPass());
builder.populateModulePassManager(optPM);
#endif
optPM.add(CreateMakeInternalFuncsStaticPass());
@@ -2221,9 +2225,11 @@ IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
// not a __is_compile_time_constant_* function
continue;
// This optimization pass can be disabled with the (poorly named)
// disableGatherScatterFlattening option.
if (g->opt.disableGatherScatterFlattening) {
// This optimization pass can be disabled with both the (poorly
// named) disableGatherScatterFlattening option and
// disableMaskAllOnOptimizations.
if (g->opt.disableGatherScatterFlattening ||
g->opt.disableMaskAllOnOptimizations) {
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, LLVMFalse);
modifiedAny = true;
goto restart;

View File

@@ -868,7 +868,9 @@ lSafeToRunWithAllLanesOff(Stmt *stmt) {
void
IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
llvm::Value *oldMask = ctx->GetInternalMask();
if (ctx->GetFullMask() == LLVMMaskAllOn && !g->opt.disableCoherentControlFlow) {
if (ctx->GetFullMask() == LLVMMaskAllOn &&
!g->opt.disableCoherentControlFlow &&
!g->opt.disableMaskAllOnOptimizations) {
// We can tell that the mask is on statically at compile time; just
// emit code for the 'if test with the mask all on' path
llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done");
@@ -952,9 +954,11 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest,
// code emitted here can operate with the knowledge that the mask is
// definitely all on (until it modifies the mask itself).
assert(!g->opt.disableCoherentControlFlow);
ctx->SetInternalMask(LLVMMaskAllOn);
if (!g->opt.disableMaskAllOnOptimizations)
ctx->SetInternalMask(LLVMMaskAllOn);
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
ctx->SetFunctionMask(LLVMMaskAllOn);
if (!g->opt.disableMaskAllOnOptimizations)
ctx->SetFunctionMask(LLVMMaskAllOn);
// First, check the value of the test. If it's all on, then we jump to
// a basic block that will only have code for the true case.
@@ -1156,9 +1160,11 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
// IfStmt::emitCoherentTests()), and then emit the code for the
// loop body.
ctx->SetCurrentBasicBlock(bAllOn);
ctx->SetInternalMask(LLVMMaskAllOn);
if (!g->opt.disableMaskAllOnOptimizations)
ctx->SetInternalMask(LLVMMaskAllOn);
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
ctx->SetFunctionMask(LLVMMaskAllOn);
if (!g->opt.disableMaskAllOnOptimizations)
ctx->SetFunctionMask(LLVMMaskAllOn);
if (bodyStmts)
bodyStmts->EmitCode(ctx);
assert(ctx->GetCurrentBasicBlock());
@@ -1379,9 +1385,11 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
// the runtime test has passed, make this fact clear for code
// generation at compile time here.)
ctx->SetCurrentBasicBlock(bAllOn);
ctx->SetInternalMask(LLVMMaskAllOn);
if (!g->opt.disableMaskAllOnOptimizations)
ctx->SetInternalMask(LLVMMaskAllOn);
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
ctx->SetFunctionMask(LLVMMaskAllOn);
if (!g->opt.disableMaskAllOnOptimizations)
ctx->SetFunctionMask(LLVMMaskAllOn);
if (stmts)
stmts->EmitCode(ctx);
assert(ctx->GetCurrentBasicBlock());