Add option to turn off codegen improvements when mask 'all on' is statically known.
This commit is contained in:
49
ctx.cpp
49
ctx.cpp
@@ -197,6 +197,47 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||
returnValuePtr = AllocaInst(ftype, "return_value_memory");
|
||||
}
|
||||
|
||||
if (g->opt.disableMaskAllOnOptimizations) {
|
||||
// This is really disgusting. We want to be able to fool the
|
||||
// compiler to not be able to reason that the mask is all on, but
|
||||
// we don't want to pay too much of a price at the start of each
|
||||
// function to do so.
|
||||
//
|
||||
// Therefore: first, we declare a module-static __all_on_mask
|
||||
// variable that will hold an "all on" mask value. At the start of
|
||||
// each function, we'll load its value and call SetInternalMaskAnd
|
||||
// with the result to set the current internal execution mask.
|
||||
// (This is a no-op at runtime.)
|
||||
//
|
||||
// Then, to fool the optimizer that maybe the value of
|
||||
// __all_on_mask can't be guaranteed to be "all on", we emit a
|
||||
// dummy function that sets __all_on_mask be "all off". (That
|
||||
// function is never actually called.)
|
||||
llvm::Value *globalAllOnMaskPtr =
|
||||
m->module->getNamedGlobal("__all_on_mask");
|
||||
if (globalAllOnMaskPtr == NULL) {
|
||||
globalAllOnMaskPtr =
|
||||
new llvm::GlobalVariable(*m->module, LLVMTypes::MaskType, false,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
LLVMMaskAllOn, "__all_on_mask");
|
||||
|
||||
char buf[256];
|
||||
sprintf(buf, "__off_all_on_mask_%s", g->target.GetISAString());
|
||||
llvm::Constant *offFunc =
|
||||
m->module->getOrInsertFunction(buf, LLVMTypes::VoidType,
|
||||
NULL);
|
||||
assert(llvm::isa<llvm::Function>(offFunc));
|
||||
llvm::BasicBlock *offBB =
|
||||
llvm::BasicBlock::Create(*g->ctx, "entry",
|
||||
(llvm::Function *)offFunc, 0);
|
||||
new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB);
|
||||
llvm::ReturnInst::Create(*g->ctx, offBB);
|
||||
}
|
||||
|
||||
llvm::Value *allOnMask = LoadInst(globalAllOnMaskPtr, "all_on_mask");
|
||||
SetInternalMaskAnd(LLVMMaskAllOn, allOnMask);
|
||||
}
|
||||
|
||||
if (m->diBuilder) {
|
||||
/* If debugging is enabled, tell the debug information emission
|
||||
code about this new function */
|
||||
@@ -271,7 +312,8 @@ FunctionEmitContext::GetFunctionMask() {
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::GetInternalMask() {
|
||||
if (VaryingCFDepth() == 0)
|
||||
if (VaryingCFDepth() == 0 &&
|
||||
!g->opt.disableMaskAllOnOptimizations)
|
||||
return LLVMMaskAllOn;
|
||||
else
|
||||
return LoadInst(internalMaskPointer, "load_mask");
|
||||
@@ -281,7 +323,8 @@ FunctionEmitContext::GetInternalMask() {
|
||||
llvm::Value *
|
||||
FunctionEmitContext::GetFullMask() {
|
||||
llvm::Value *internalMask = GetInternalMask();
|
||||
if (internalMask == LLVMMaskAllOn && functionMaskValue == LLVMMaskAllOn)
|
||||
if (internalMask == LLVMMaskAllOn && functionMaskValue == LLVMMaskAllOn &&
|
||||
!g->opt.disableMaskAllOnOptimizations)
|
||||
return LLVMMaskAllOn;
|
||||
else
|
||||
return BinaryOperator(llvm::Instruction::And, GetInternalMask(),
|
||||
@@ -2047,7 +2090,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr,
|
||||
if (ptrType->GetBaseType()->IsUniformType())
|
||||
// the easy case
|
||||
StoreInst(value, ptr);
|
||||
else if (mask == LLVMMaskAllOn)
|
||||
else if (mask == LLVMMaskAllOn && !g->opt.disableMaskAllOnOptimizations)
|
||||
// Otherwise it is a masked store unless we can determine that the
|
||||
// mask is all on... (Unclear if this check is actually useful.)
|
||||
StoreInst(value, ptr);
|
||||
|
||||
1
expr.cpp
1
expr.cpp
@@ -659,6 +659,7 @@ lStoreAssignResult(llvm::Value *value, llvm::Value *ptr, const Type *ptrType,
|
||||
assert(baseSym != NULL &&
|
||||
baseSym->varyingCFDepth <= ctx->VaryingCFDepth());
|
||||
if (!g->opt.disableMaskedStoreToStore &&
|
||||
!g->opt.disableMaskAllOnOptimizations &&
|
||||
baseSym->varyingCFDepth == ctx->VaryingCFDepth() &&
|
||||
baseSym->storageClass != SC_STATIC &&
|
||||
dynamic_cast<const ReferenceType *>(baseSym->type) == NULL &&
|
||||
|
||||
2
ispc.cpp
2
ispc.cpp
@@ -347,6 +347,7 @@ Opt::Opt() {
|
||||
force32BitAddressing = true;
|
||||
unrollLoops = true;
|
||||
disableAsserts = false;
|
||||
disableMaskAllOnOptimizations = false;
|
||||
disableHandlePseudoMemoryOps = false;
|
||||
disableBlendedMaskedStores = false;
|
||||
disableCoherentControlFlow = false;
|
||||
@@ -355,7 +356,6 @@ Opt::Opt() {
|
||||
disableMaskedStoreToStore = false;
|
||||
disableGatherScatterFlattening = false;
|
||||
disableUniformMemoryOptimizations = false;
|
||||
disableMaskedStoreOptimizations = false;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
15
ispc.h
15
ispc.h
@@ -250,7 +250,12 @@ struct Opt {
|
||||
/** Indicates whether assert() statements should be ignored (for
|
||||
performance in the generated code). */
|
||||
bool disableAsserts;
|
||||
|
||||
|
||||
/** If enabled, disables the various optimizations that kick in when
|
||||
the execution mask can be determined to be "all on" at compile
|
||||
time. */
|
||||
bool disableMaskAllOnOptimizations;
|
||||
|
||||
/** If enabled, the various __pseudo* memory ops (gather/scatter,
|
||||
masked load/store) are left in their __pseudo* form, for better
|
||||
understanding of the structure of generated code when reading
|
||||
@@ -302,14 +307,6 @@ struct Opt {
|
||||
than gathers/scatters. This is likely only useful for measuring
|
||||
the impact of this optimization. */
|
||||
bool disableUniformMemoryOptimizations;
|
||||
|
||||
/** Disables optimizations for masked stores: masked stores with the
|
||||
mask all on are transformed to regular stores, and masked stores
|
||||
with the mask are all off are removed (which in turn can allow
|
||||
eliminating additional dead code related to computing the value
|
||||
stored). This is likely only useful for measuring the impact of
|
||||
this optimization. */
|
||||
bool disableMaskedStoreOptimizations;
|
||||
};
|
||||
|
||||
/** @brief This structure collects together a number of global variables.
|
||||
|
||||
14
main.cpp
14
main.cpp
@@ -92,15 +92,15 @@ static void usage(int ret) {
|
||||
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
||||
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
||||
#if 0
|
||||
printf(" disable-handle-pseudo-memory-ops\n");
|
||||
printf(" disable-all-on-optimizations\n");
|
||||
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
|
||||
printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n");
|
||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
|
||||
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
|
||||
printf(" disable-handle-pseudo-memory-ops\n");
|
||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
|
||||
#endif
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
printf(" [--pic]\t\t\t\tGenerate position-independent code\n");
|
||||
@@ -271,6 +271,8 @@ int main(int Argc, char *Argv[]) {
|
||||
|
||||
// These are only used for performance tests of specific
|
||||
// optimizations
|
||||
else if (!strcmp(opt, "disable-all-on-optimizations"))
|
||||
g->opt.disableMaskAllOnOptimizations = true;
|
||||
else if (!strcmp(opt, "disable-handle-pseudo-memory-ops"))
|
||||
g->opt.disableHandlePseudoMemoryOps = true;
|
||||
else if (!strcmp(opt, "disable-blended-masked-stores"))
|
||||
@@ -287,8 +289,6 @@ int main(int Argc, char *Argv[]) {
|
||||
g->opt.disableGatherScatterFlattening = true;
|
||||
else if (!strcmp(opt, "disable-uniform-memory-optimizations"))
|
||||
g->opt.disableUniformMemoryOptimizations = true;
|
||||
else if (!strcmp(opt, "disable-masked-store-optimizations"))
|
||||
g->opt.disableMaskedStoreOptimizations = true;
|
||||
else
|
||||
usage(1);
|
||||
}
|
||||
|
||||
18
opt.cpp
18
opt.cpp
@@ -250,7 +250,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
optPM.add(llvm::createReassociatePass());
|
||||
optPM.add(llvm::createConstantPropagationPass());
|
||||
|
||||
if (!g->opt.disableMaskedStoreOptimizations) {
|
||||
if (!g->opt.disableMaskAllOnOptimizations) {
|
||||
optPM.add(CreateIntrinsicsOptPass());
|
||||
optPM.add(CreateMaskedStoreOptPass());
|
||||
}
|
||||
@@ -287,7 +287,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
optPM.add(llvm::createInstructionCombiningPass());
|
||||
optPM.add(llvm::createTailCallEliminationPass());
|
||||
|
||||
if (!g->opt.disableMaskedStoreOptimizations) {
|
||||
if (!g->opt.disableMaskAllOnOptimizations) {
|
||||
optPM.add(CreateIntrinsicsOptPass());
|
||||
optPM.add(CreateMaskedStoreOptPass());
|
||||
}
|
||||
@@ -334,12 +334,16 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
builder.DisableUnrollLoops = true;
|
||||
builder.populateFunctionPassManager(funcPM);
|
||||
builder.populateModulePassManager(optPM);
|
||||
optPM.add(CreateIsCompileTimeConstantPass(true));
|
||||
|
||||
optPM.add(CreateIsCompileTimeConstantPass(false));
|
||||
optPM.add(CreateIntrinsicsOptPass());
|
||||
|
||||
builder.populateLTOPassManager(optPM, true /* internalize */,
|
||||
true /* inline once again */);
|
||||
|
||||
optPM.add(CreateIsCompileTimeConstantPass(true));
|
||||
optPM.add(CreateIntrinsicsOptPass());
|
||||
|
||||
builder.populateModulePassManager(optPM);
|
||||
#endif
|
||||
optPM.add(CreateMakeInternalFuncsStaticPass());
|
||||
@@ -2221,9 +2225,11 @@ IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
// not a __is_compile_time_constant_* function
|
||||
continue;
|
||||
|
||||
// This optimization pass can be disabled with the (poorly named)
|
||||
// disableGatherScatterFlattening option.
|
||||
if (g->opt.disableGatherScatterFlattening) {
|
||||
// This optimization pass can be disabled with both the (poorly
|
||||
// named) disableGatherScatterFlattening option and
|
||||
// disableMaskAllOnOptimizations.
|
||||
if (g->opt.disableGatherScatterFlattening ||
|
||||
g->opt.disableMaskAllOnOptimizations) {
|
||||
llvm::ReplaceInstWithValue(i->getParent()->getInstList(), i, LLVMFalse);
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
|
||||
22
stmt.cpp
22
stmt.cpp
@@ -868,7 +868,9 @@ lSafeToRunWithAllLanesOff(Stmt *stmt) {
|
||||
void
|
||||
IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
||||
llvm::Value *oldMask = ctx->GetInternalMask();
|
||||
if (ctx->GetFullMask() == LLVMMaskAllOn && !g->opt.disableCoherentControlFlow) {
|
||||
if (ctx->GetFullMask() == LLVMMaskAllOn &&
|
||||
!g->opt.disableCoherentControlFlow &&
|
||||
!g->opt.disableMaskAllOnOptimizations) {
|
||||
// We can tell that the mask is on statically at compile time; just
|
||||
// emit code for the 'if test with the mask all on' path
|
||||
llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done");
|
||||
@@ -952,9 +954,11 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest,
|
||||
// code emitted here can operate with the knowledge that the mask is
|
||||
// definitely all on (until it modifies the mask itself).
|
||||
assert(!g->opt.disableCoherentControlFlow);
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
|
||||
// First, check the value of the test. If it's all on, then we jump to
|
||||
// a basic block that will only have code for the true case.
|
||||
@@ -1156,9 +1160,11 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// IfStmt::emitCoherentTests()), and then emit the code for the
|
||||
// loop body.
|
||||
ctx->SetCurrentBasicBlock(bAllOn);
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (bodyStmts)
|
||||
bodyStmts->EmitCode(ctx);
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
@@ -1379,9 +1385,11 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// the runtime test has passed, make this fact clear for code
|
||||
// generation at compile time here.)
|
||||
ctx->SetCurrentBasicBlock(bAllOn);
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (stmts)
|
||||
stmts->EmitCode(ctx);
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
|
||||
Reference in New Issue
Block a user