Unroll loops by default, add --opt=disable-loop-unroll to disable.

Issue #78.
This commit is contained in:
Matt Pharr
2011-09-13 15:36:34 -07:00
parent 0c344b6755
commit 30f9dcd4f5
5 changed files with 33 additions and 11 deletions

View File

@@ -2,7 +2,7 @@
CXX=g++ -m64
CXXFLAGS=-Iobjs/ -g3 -Wall
ISPC=ispc
ISPCFLAGS=-O2 --fast-math --instrument --arch=x86-64
ISPCFLAGS=-O2 --instrument --arch=x86-64
default: ao

View File

@@ -256,6 +256,7 @@ Opt::Opt() {
level = 1;
fastMath = false;
fastMaskedVload = false;
unrollLoops = true;
disableBlendedMaskedStores = false;
disableCoherentControlFlow = false;
disableUniformControlFlow = false;

4
ispc.h
View File

@@ -244,6 +244,10 @@ struct Opt {
arrays, so is unsafe in general.) */
bool fastMaskedVload;
/** Indicates when loops should be unrolled (when doing so seems like
it will make sense. */
bool unrollLoops;
/** On targets that don't have a masked store instruction but do have a
blending instruction, by default, we simulate masked stores by
loading the old value, blending, and storing the result. This can

View File

@@ -73,8 +73,6 @@ static void usage(int ret) {
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
printf(" [--fast-math]\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
printf(" [--fast-masked-vload]\t\tFaster masked vector loads on SSE (may go past end of array)\n");
printf(" [-g]\t\t\t\tGenerate debugging information\n");
printf(" [--help]\t\t\t\tPrint help\n");
printf(" [-h <name>/--header-outfile=<name>]\tOutput filename for header\n");
@@ -88,8 +86,11 @@ static void usage(int ret) {
printf(" [--nocpp]\t\t\t\tDon't run the C preprocessor\n");
printf(" [-o <name>/--outfile=<name>]\tOutput filename (may be \"-\" for standard output)\n");
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
#if 0
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
#if 0
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
@@ -198,10 +199,15 @@ int main(int Argc, char *Argv[]) {
arch = argv[i] + 7;
else if (!strncmp(argv[i], "--cpu=", 6))
cpu = argv[i] + 6;
else if (!strcmp(argv[i], "--fast-math"))
g->opt.fastMath = true;
else if (!strcmp(argv[i], "--fast-masked-vload"))
g->opt.fastMaskedVload = true;
else if (!strcmp(argv[i], "--fast-math")) {
fprintf(stderr, "--fast-math option has been renamed to --opt=fast-math!\n");
usage(1);
}
else if (!strcmp(argv[i], "--fast-masked-vload")) {
fprintf(stderr, "--fast-masked-vload option has been renamed to "
"--opt=fast-masked-vload!\n");
usage(1);
}
else if (!strcmp(argv[i], "--debug"))
g->debugPrint = true;
else if (!strcmp(argv[i], "--instrument"))
@@ -238,7 +244,16 @@ int main(int Argc, char *Argv[]) {
}
else if (!strncmp(argv[i], "--opt=", 6)) {
const char *opt = argv[i] + 6;
if (!strcmp(opt, "disable-blended-masked-stores"))
if (!strcmp(opt, "fast-math"))
g->opt.fastMath = true;
else if (!strcmp(opt, "fast-masked-vload"))
g->opt.fastMaskedVload = true;
else if (!strcmp(opt, "disable-loop-unroll"))
g->opt.unrollLoops = false;
// These are only used for performance tests of specific
// optimizations
else if (!strcmp(opt, "disable-blended-masked-stores"))
g->opt.disableBlendedMaskedStores = true;
else if (!strcmp(opt, "disable-coherent-control-flow"))
g->opt.disableCoherentControlFlow = true;

View File

@@ -294,7 +294,7 @@ Optimize(llvm::Module *module, int optLevel) {
llvm::createStandardModulePasses(&optPM, 3,
false /* opt size */,
true /* unit at a time */,
false /* unroll loops */,
g->opt.unrollLoops,
true /* simplify lib calls */,
false /* may have exceptions */,
llvm::createFunctionInliningPass());
@@ -309,7 +309,7 @@ Optimize(llvm::Module *module, int optLevel) {
llvm::createStandardModulePasses(&optPM, 3,
false /* opt size */,
true /* unit at a time */,
false /* unroll loops */,
g->opt.unrollLoops,
true /* simplify lib calls */,
false /* may have exceptions */,
llvm::createFunctionInliningPass());
@@ -318,6 +318,8 @@ Optimize(llvm::Module *module, int optLevel) {
llvm::PassManagerBuilder builder;
builder.OptLevel = 3;
builder.Inliner = llvm::createFunctionInliningPass();
if (g->opt.unrollLoops == false)
builder.DisableUnrollLoops = true;
builder.populateFunctionPassManager(funcPM);
builder.populateModulePassManager(optPM);
optPM.add(CreateIsCompileTimeConstantPass(true));