diff --git a/ispc.cpp b/ispc.cpp index fce7af14..60aa3e0b 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -284,6 +284,7 @@ Opt::Opt() { fastMaskedVload = false; unrollLoops = true; disableAsserts = false; + disableHandlePseudoMemoryOps = false; disableBlendedMaskedStores = false; disableCoherentControlFlow = false; disableUniformControlFlow = false; diff --git a/ispc.h b/ispc.h index 4461e76e..cbff8a57 100644 --- a/ispc.h +++ b/ispc.h @@ -227,6 +227,12 @@ struct Opt { /** Indicates whether assert() statements should be ignored (for performance in the generated code). */ bool disableAsserts; + + /** If enabled, the various __pseudo* memory ops (gather/scatter, + masked load/store) are left in their __pseudo* form, for better + understanding of the structure of generated code when reading + it. */ + bool disableHandlePseudoMemoryOps; /** On targets that don't have a masked store instruction but do have a blending instruction, by default, we simulate masked stores by diff --git a/main.cpp b/main.cpp index 02646aff..9af8a6d9 100644 --- a/main.cpp +++ b/main.cpp @@ -88,6 +88,7 @@ static void usage(int ret) { printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n"); printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n"); #if 0 + printf(" disable-handle-pseudo-memory-ops\n"); printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n"); printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n"); printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n"); @@ -254,6 +255,8 @@ int main(int Argc, char *Argv[]) { // These are only used for performance tests of specific // optimizations + else if (!strcmp(opt, "disable-handle-pseudo-memory-ops")) + g->opt.disableHandlePseudoMemoryOps = true; else if (!strcmp(opt, "disable-blended-masked-stores")) g->opt.disableBlendedMaskedStores = true; else if (!strcmp(opt, "disable-coherent-control-flow")) diff --git a/opt.cpp b/opt.cpp index cf342172..010ab706 100644 --- a/opt.cpp +++ b/opt.cpp @@ -201,8 +201,10 @@ Optimize(llvm::Module *module, int optLevel) { // them into something that can actually execute. optPM.add(llvm::createPromoteMemoryToRegisterPass()); optPM.add(CreateGatherScatterFlattenPass()); - optPM.add(CreateLowerGatherScatterPass()); - optPM.add(CreateLowerMaskedStorePass()); + if (g->opt.disableHandlePseudoMemoryOps == false) { + optPM.add(CreateLowerGatherScatterPass()); + optPM.add(CreateLowerMaskedStorePass()); + } optPM.add(CreateIsCompileTimeConstantPass(true)); optPM.add(llvm::createFunctionInliningPass()); optPM.add(CreateMakeInternalFuncsStaticPass()); @@ -282,8 +284,10 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(CreateLowerMaskedStorePass()); if (!g->opt.disableGatherScatterOptimizations) optPM.add(CreateGatherScatterImprovementsPass()); - optPM.add(CreateLowerMaskedStorePass()); - optPM.add(CreateLowerGatherScatterPass()); + if (g->opt.disableHandlePseudoMemoryOps == false) { + optPM.add(CreateLowerMaskedStorePass()); + optPM.add(CreateLowerGatherScatterPass()); + } optPM.add(llvm::createFunctionInliningPass()); optPM.add(llvm::createConstantPropagationPass()); optPM.add(CreateIntrinsicsOptPass());