From a174a90f86a65ddb6a9cde897996b1ccea368144 Mon Sep 17 00:00:00 2001
From: Ilia Filippov <ili.filippov@gmail.com>
Date: Thu, 1 Aug 2013 11:37:52 +0400
Subject: [PATCH] Supporting dumping, switching off and debug printing of
 optimization phases

---
 ispc.cpp |   1 +
 ispc.h   |  14 ++++++
 main.cpp |  60 +++++++++++++++++++++++
 opt.cpp  | 146 +++++++++++++++++++++++++++++++++++++++++++------------
 4 files changed, 191 insertions(+), 30 deletions(-)
diff --git a/ispc.cpp b/ispc.cpp
index 887f6ca3..82ad8f4d 100644
--- a/ispc.cpp
+++ b/ispc.cpp
@@ -774,6 +774,7 @@ Globals::Globals() {
     includeStdlib = true;
     runCPP = true;
     debugPrint = false;
+    debugIR = -1;
     disableWarnings = false;
     warningsAsErrors = false;
     quiet = false;
diff --git a/ispc.h b/ispc.h
index 7d10b908..57cba2e5 100644
--- a/ispc.h
+++ b/ispc.h
@@ -59,6 +59,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <vector>
+#include <set>
 #include <string>
 
 /** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
@@ -66,6 +67,9 @@
  */
 #define ISPC_MAX_NVEC 64
 
+// Number of final optimization phase
+#define LAST_OPT_NUMBER 1000
+
 // Forward declarations of a number of widely-used LLVM types
 namespace llvm {
     class AttributeSet;
@@ -494,6 +498,16 @@ struct Globals {
         ispc's execution. */
     bool debugPrint;
 
+    /** Indicates which stages of optimization we want to dump. */
+    std::set<int> debug_stages;
+
+    /** Indicates after which optimization we want to generate
+        DebugIR information. */
+    int debugIR;
+
+    /** Indicates which phases of optimization we want to switch off. */
+    std::set<int> off_stages;
+
     /** Indicates whether all warning messages should be surpressed. */
     bool disableWarnings;
 
diff --git a/main.cpp b/main.cpp
index de2bb620..b107075c 100644
--- a/main.cpp
+++ b/main.cpp
@@ -156,6 +156,11 @@ devUsage(int ret) {
     printf("        disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
     printf("        disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
     printf("    [--yydebug]\t\t\t\tPrint debugging information during parsing\n");
+    printf("    [--debug-phase=<value>]\t\tSet optimization phases to dump. --debug-phase=first,210:220,300,305,310:last\n");
+#ifdef LLVM_3_4
+    printf("    [--debug-ir=<value>]\t\tSet optimization phase to generate debugIR after it\n");
+#endif
+    printf("    [--off-phase=<value>]\t\tSwitch off optimization phases. --off-phase=first,210:220,300,305,310:last\n");
     exit(ret);
 }
 
@@ -212,6 +217,47 @@ lSignal(void *) {
 }
 
 
+static int ParsingPhaseName(char * stage) {
+    if (strncmp(stage, "first", 5) == 0) {
+        return 0;
+    }
+    else if (strncmp(stage, "last", 4) == 0) {
+        return LAST_OPT_NUMBER;
+    }
+    else {
+        int t = atoi(stage);
+        if (t < 0 || t > LAST_OPT_NUMBER) {
+            fprintf(stderr, "Phases must be from 0 to %d. %s is incorrect.\n", LAST_OPT_NUMBER, stage);
+            exit(0);
+        }
+        else {
+            return t;
+        }
+    }
+}
+
+
+static std::set<int> ParsingPhases(char * stages) {
+    std::set<int> phases;
+    int begin = ParsingPhaseName(stages);
+    int end = begin;
+
+    for (unsigned i = 0; i < strlen(stages); i++) {
+        if ((stages[i] == ',') || (i == strlen(stages) - 1)) {
+            for (int j = begin; j < end + 1; j++) {
+                phases.insert(j);
+            }
+            begin = ParsingPhaseName(stages + i + 1);
+            end = begin;
+        }
+        else if (stages[i] == ':') {
+            end = ParsingPhaseName(stages + i + 1);
+        }
+    }
+    return phases;
+}
+
+
 static void
 lParseInclude(const char *path) {
 #ifdef ISPC_IS_WINDOWS
@@ -489,6 +535,20 @@ int main(int Argc, char *Argv[]) {
           }
           hostStubFileName = argv[i];
         }
+        else if (strncmp(argv[i], "--debug-phase=", 14) == 0) {
+            fprintf(stderr, "WARNING: Adding debug phases may change the way PassManager"
+                            "handles the phases and it may possibly make some bugs go"
+                            "away or introduce the new ones.\n");
+            g->debug_stages = ParsingPhases(argv[i] + strlen("--debug-phase="));
+        }
+#ifdef LLVM_3_4
+        else if (strncmp(argv[i], "--debug-ir=", 11) == 0) {
+            g->debugIR = ParsingPhaseName(argv[i] + strlen("--debug-ir="));
+        }
+#endif
+        else if (strncmp(argv[i], "--off-phase=", 12) == 0) {
+            g->off_stages = ParsingPhases(argv[i] + strlen("--off-phase="));
+        }
         else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
             lPrintVersion();
             return 0;
diff --git a/opt.cpp b/opt.cpp
index ba32c639..4602da43 100644
--- a/opt.cpp
+++ b/opt.cpp
@@ -63,6 +63,9 @@
   #include <llvm/IR/BasicBlock.h>
   #include <llvm/IR/Constants.h>
 #endif
+#if defined (LLVM_3_4)
+  #include <llvm/Transforms/Instrumentation.h>
+#endif
 #include <llvm/PassManager.h>
 #include <llvm/PassRegistry.h>
 #include <llvm/Assembly/PrintModulePass.h>
@@ -117,6 +120,8 @@ static llvm::Pass *CreateReplacePseudoMemoryOpsPass();
 static llvm::Pass *CreateIsCompileTimeConstantPass(bool isLastTry);
 static llvm::Pass *CreateMakeInternalFuncsStaticPass();
 
+static llvm::Pass *CreateDebugPass(char * output);
+
 #define DEBUG_START_PASS(NAME)                                 \
     if (g->debugPrint &&                                       \
         (getenv("FUNC") == NULL ||                             \
@@ -393,6 +398,54 @@ lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) {
 }
 
 
+///////////////////////////////////////////////////////////////////////////
+// This is a wrap over class llvm::PassManager. This duplicates PassManager function run()
+//   and change PassManager function add by adding some checks and debug passes.
+//   This wrap can control:
+//   - If we want to switch off optimization with given number.
+//   - If we want to dump LLVM IR after optimization with given number.
+//   - If we want to generate LLVM IR debug for gdb after optimization with given number.
+class DebugPassManager {
+public:
+    DebugPassManager():number(0){}
+    void add(llvm::Pass * P, int stage);
+    bool run(llvm::Module& M) {return PM.run(M);}
+    llvm::PassManager& getPM() {return PM;}
+
+private:
+    llvm::PassManager PM;
+    int number;
+};
+
+void
+DebugPassManager::add(llvm::Pass * P, int stage = -1) {
+    // taking number of optimization
+    if (stage == -1) {
+        number++;
+    }
+    else {
+        number = stage;
+    }
+    if (g->off_stages.find(number) == g->off_stages.end()) {
+        // adding optimization (not switched off)
+        PM.add(P);
+        if (g->debug_stages.find(number) != g->debug_stages.end()) {
+            // adding dump of LLVM IR after optimization
+            char buf[100];
+            sprintf(buf, "\n\n*****LLVM IR after phase %d: %s*****\n\n",
+                number, P->getPassName());
+            PM.add(CreateDebugPass(buf));
+        }
+#ifdef LLVM_3_4
+        if (g->debugIR == number) {
+            // adding generating of LLVM IR debug after optimization
+            char buf[100];
+            sprintf(buf, "Debug_IR_after_%d_phase.bc", number);
+            PM.add(llvm::createDebugIRPass(true, true, ".", buf));
+        }
+#endif
+    }
+}
 ///////////////////////////////////////////////////////////////////////////
 
 void
@@ -401,14 +454,8 @@ Optimize(llvm::Module *module, int optLevel) {
         printf("*** Code going into optimization ***\n");
         module->dump();
     }
-
-    llvm::PassManager optPM;
-    optPM.add(llvm::createVerifierPass());
-
-#if 0
-    std::string err;
-    optPM.add(llvm::createPrintModulePass(new llvm::raw_fd_ostream("-", err)));
-#endif
+    DebugPassManager optPM;
+    optPM.add(llvm::createVerifierPass(),0);
 
     llvm::TargetLibraryInfo *targetLibraryInfo =
         new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple()));
@@ -425,7 +472,7 @@ Optimize(llvm::Module *module, int optLevel) {
     optPM.add(new llvm::TargetTransformInfo(targetMachine->getScalarTargetTransformInfo(),
                                             targetMachine->getVectorTargetTransformInfo()));
   #else // LLVM 3.3+
-    targetMachine->addAnalysisPasses(optPM);
+    targetMachine->addAnalysisPasses(optPM.getPM());
   #endif
 #endif
 
@@ -437,11 +484,11 @@ Optimize(llvm::Module *module, int optLevel) {
         // run absolutely no optimizations, since the front-end needs us to
         // take the various __pseudo_* functions it has emitted and turn
         // them into something that can actually execute.
-        optPM.add(CreateImproveMemoryOpsPass());
+        optPM.add(CreateImproveMemoryOpsPass(), 100);
         if (g->opt.disableHandlePseudoMemoryOps == false)
             optPM.add(CreateReplacePseudoMemoryOpsPass());
 
-        optPM.add(CreateIntrinsicsOptPass());
+        optPM.add(CreateIntrinsicsOptPass(), 102);
         optPM.add(CreateIsCompileTimeConstantPass(true));
         optPM.add(llvm::createFunctionInliningPass());
         optPM.add(CreateMakeInternalFuncsStaticPass());
@@ -460,7 +507,7 @@ Optimize(llvm::Module *module, int optLevel) {
         llvm::initializeInstrumentation(*registry);
         llvm::initializeTarget(*registry);
 
-        optPM.add(llvm::createGlobalDCEPass());
+        optPM.add(llvm::createGlobalDCEPass(), 200);
 
         // Early optimizations to try to reduce the total amount of code to
         // work with if we can
@@ -471,14 +518,14 @@ Optimize(llvm::Module *module, int optLevel) {
 
         if (g->opt.disableGatherScatterOptimizations == false &&
             g->target->getVectorWidth() > 1) {
-            optPM.add(llvm::createInstructionCombiningPass());
+            optPM.add(llvm::createInstructionCombiningPass(), 210);
             optPM.add(CreateImproveMemoryOpsPass());
         }
         if (!g->opt.disableMaskAllOnOptimizations) {
-            optPM.add(CreateIntrinsicsOptPass());
+            optPM.add(CreateIntrinsicsOptPass(), 215);
             optPM.add(CreateVSelMovmskOptPass());
         }
-        optPM.add(llvm::createDeadInstEliminationPass());
+        optPM.add(llvm::createDeadInstEliminationPass(), 220);
 
         // Max struct size threshold for scalar replacement is
         //    1) 4 fields (r,g,b,w)
@@ -508,9 +555,9 @@ Optimize(llvm::Module *module, int optLevel) {
 #if defined(LLVM_3_1) || defined(LLVM_3_2) || defined(LLVM_3_3)
         // Starting from 3.4 this functionality was moved to
         // InstructionCombiningPass. See r184459 for details.
-        optPM.add(llvm::createSimplifyLibCallsPass());
+        optPM.add(llvm::createSimplifyLibCallsPass(), 240);
 #endif
-        optPM.add(llvm::createInstructionCombiningPass());
+        optPM.add(llvm::createInstructionCombiningPass(), 241);
         optPM.add(llvm::createJumpThreadingPass());
         optPM.add(llvm::createCFGSimplificationPass());
         optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
@@ -518,43 +565,45 @@ Optimize(llvm::Module *module, int optLevel) {
         optPM.add(llvm::createTailCallEliminationPass());
 
         if (!g->opt.disableMaskAllOnOptimizations) {
-            optPM.add(CreateIntrinsicsOptPass());
+            optPM.add(CreateIntrinsicsOptPass(), 250);
             optPM.add(CreateVSelMovmskOptPass());
         }
 
         if (g->opt.disableGatherScatterOptimizations == false &&
             g->target->getVectorWidth() > 1) {
-            optPM.add(llvm::createInstructionCombiningPass());
+            optPM.add(llvm::createInstructionCombiningPass(), 255);
             optPM.add(CreateImproveMemoryOpsPass());
 
             if (g->opt.disableCoalescing == false &&
                 g->target->getISA() != Target::GENERIC) {
                 // It is important to run this here to make it easier to
                 // finding matching gathers we can coalesce..
-                optPM.add(llvm::createEarlyCSEPass());
+                optPM.add(llvm::createEarlyCSEPass(), 260);
                 optPM.add(CreateGatherCoalescePass());
             }
         }
 
-        optPM.add(llvm::createFunctionInliningPass());
+        optPM.add(llvm::createFunctionInliningPass(), 265);
         optPM.add(llvm::createConstantPropagationPass());
         optPM.add(CreateIntrinsicsOptPass());
         optPM.add(CreateVSelMovmskOptPass());
 
         if (g->opt.disableGatherScatterOptimizations == false &&
             g->target->getVectorWidth() > 1) {
-            optPM.add(llvm::createInstructionCombiningPass());
+            optPM.add(llvm::createInstructionCombiningPass(), 270);
             optPM.add(CreateImproveMemoryOpsPass());
         }
 
-        optPM.add(llvm::createIPSCCPPass());
+        optPM.add(llvm::createIPSCCPPass(), 275);
         optPM.add(llvm::createDeadArgEliminationPass());
         optPM.add(llvm::createInstructionCombiningPass());
         optPM.add(llvm::createCFGSimplificationPass());
 
-        if (g->opt.disableHandlePseudoMemoryOps == false)
-            optPM.add(CreateReplacePseudoMemoryOpsPass());
-        optPM.add(CreateIntrinsicsOptPass());
+        if (g->opt.disableHandlePseudoMemoryOps == false) {
+            optPM.add(CreateReplacePseudoMemoryOpsPass(),280);
+        }
+
+        optPM.add(CreateIntrinsicsOptPass(),281);
         optPM.add(CreateVSelMovmskOptPass());
 
         optPM.add(llvm::createFunctionInliningPass());
@@ -570,9 +619,10 @@ Optimize(llvm::Module *module, int optLevel) {
         optPM.add(llvm::createIndVarSimplifyPass());
         optPM.add(llvm::createLoopIdiomPass());
         optPM.add(llvm::createLoopDeletionPass());
-        if (g->opt.unrollLoops)
-            optPM.add(llvm::createLoopUnrollPass());
-        optPM.add(llvm::createGVNPass());
+        if (g->opt.unrollLoops) {
+            optPM.add(llvm::createLoopUnrollPass(), 300);
+        }
+        optPM.add(llvm::createGVNPass(), 301);
 
         optPM.add(CreateIsCompileTimeConstantPass(true));
         optPM.add(CreateIntrinsicsOptPass());
@@ -595,7 +645,7 @@ Optimize(llvm::Module *module, int optLevel) {
 
     // Finish up by making sure we didn't mess anything up in the IR along
     // the way.
-    optPM.add(llvm::createVerifierPass());
+    optPM.add(llvm::createVerifierPass(), LAST_OPT_NUMBER);
     optPM.run(*module);
 
     if (g->debugPrint) {
@@ -4240,6 +4290,42 @@ CreateIsCompileTimeConstantPass(bool isLastTry) {
     return new IsCompileTimeConstantPass(isLastTry);
 }
 
+//////////////////////////////////////////////////////////////////////////
+// DebugPass
+
+/** This pass is added in list of passes after optimizations which
+    we want to debug and print dump of LLVM IR in stderr. Also it
+    prints name and number of previous optimization.
+ */
+class DebugPass : public llvm::ModulePass {
+public:
+    static char ID;
+    DebugPass(char * output) : ModulePass(ID) {
+        sprintf(str_output, "%s", output);
+    }
+
+    const char *getPassName() const { return "Dump LLVM IR"; }
+    bool runOnModule(llvm::Module &m);
+
+private:
+    char str_output[100];
+};
+
+char DebugPass::ID = 0;
+
+bool
+DebugPass::runOnModule(llvm::Module &module) {
+    fprintf(stderr, "%s", str_output);
+    fflush(stderr);
+    module.dump();
+    return true;
+}
+
+static llvm::Pass *
+CreateDebugPass(char * output) {
+    return new DebugPass(output);
+}
+
 ///////////////////////////////////////////////////////////////////////////
 // MakeInternalFuncsStaticPass