Unroll loops by default, add --opt=disable-loop-unroll to disable.

Issue #78.
This commit is contained in:
Matt Pharr
2011-09-13 15:36:34 -07:00
parent 0c344b6755
commit 30f9dcd4f5
5 changed files with 33 additions and 11 deletions

View File

@@ -2,7 +2,7 @@
CXX=g++ -m64 CXX=g++ -m64
CXXFLAGS=-Iobjs/ -g3 -Wall CXXFLAGS=-Iobjs/ -g3 -Wall
ISPC=ispc ISPC=ispc
ISPCFLAGS=-O2 --fast-math --instrument --arch=x86-64 ISPCFLAGS=-O2 --instrument --arch=x86-64
default: ao default: ao

View File

@@ -256,6 +256,7 @@ Opt::Opt() {
level = 1; level = 1;
fastMath = false; fastMath = false;
fastMaskedVload = false; fastMaskedVload = false;
unrollLoops = true;
disableBlendedMaskedStores = false; disableBlendedMaskedStores = false;
disableCoherentControlFlow = false; disableCoherentControlFlow = false;
disableUniformControlFlow = false; disableUniformControlFlow = false;

4
ispc.h
View File

@@ -244,6 +244,10 @@ struct Opt {
arrays, so is unsafe in general.) */ arrays, so is unsafe in general.) */
bool fastMaskedVload; bool fastMaskedVload;
/** Indicates when loops should be unrolled (when doing so seems like
it will make sense. */
bool unrollLoops;
/** On targets that don't have a masked store instruction but do have a /** On targets that don't have a masked store instruction but do have a
blending instruction, by default, we simulate masked stores by blending instruction, by default, we simulate masked stores by
loading the old value, blending, and storing the result. This can loading the old value, blending, and storing the result. This can

View File

@@ -73,8 +73,6 @@ static void usage(int ret) {
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n"); printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n"); printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n"); printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
printf(" [--fast-math]\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
printf(" [--fast-masked-vload]\t\tFaster masked vector loads on SSE (may go past end of array)\n");
printf(" [-g]\t\t\t\tGenerate debugging information\n"); printf(" [-g]\t\t\t\tGenerate debugging information\n");
printf(" [--help]\t\t\t\tPrint help\n"); printf(" [--help]\t\t\t\tPrint help\n");
printf(" [-h <name>/--header-outfile=<name>]\tOutput filename for header\n"); printf(" [-h <name>/--header-outfile=<name>]\tOutput filename for header\n");
@@ -88,8 +86,11 @@ static void usage(int ret) {
printf(" [--nocpp]\t\t\t\tDon't run the C preprocessor\n"); printf(" [--nocpp]\t\t\t\tDon't run the C preprocessor\n");
printf(" [-o <name>/--outfile=<name>]\tOutput filename (may be \"-\" for standard output)\n"); printf(" [-o <name>/--outfile=<name>]\tOutput filename (may be \"-\" for standard output)\n");
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n"); printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
#if 0
printf(" [--opt=<option>]\t\t\tSet optimization option\n"); printf(" [--opt=<option>]\t\t\tSet optimization option\n");
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
#if 0
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n"); printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n"); printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n"); printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
@@ -198,10 +199,15 @@ int main(int Argc, char *Argv[]) {
arch = argv[i] + 7; arch = argv[i] + 7;
else if (!strncmp(argv[i], "--cpu=", 6)) else if (!strncmp(argv[i], "--cpu=", 6))
cpu = argv[i] + 6; cpu = argv[i] + 6;
else if (!strcmp(argv[i], "--fast-math")) else if (!strcmp(argv[i], "--fast-math")) {
g->opt.fastMath = true; fprintf(stderr, "--fast-math option has been renamed to --opt=fast-math!\n");
else if (!strcmp(argv[i], "--fast-masked-vload")) usage(1);
g->opt.fastMaskedVload = true; }
else if (!strcmp(argv[i], "--fast-masked-vload")) {
fprintf(stderr, "--fast-masked-vload option has been renamed to "
"--opt=fast-masked-vload!\n");
usage(1);
}
else if (!strcmp(argv[i], "--debug")) else if (!strcmp(argv[i], "--debug"))
g->debugPrint = true; g->debugPrint = true;
else if (!strcmp(argv[i], "--instrument")) else if (!strcmp(argv[i], "--instrument"))
@@ -238,7 +244,16 @@ int main(int Argc, char *Argv[]) {
} }
else if (!strncmp(argv[i], "--opt=", 6)) { else if (!strncmp(argv[i], "--opt=", 6)) {
const char *opt = argv[i] + 6; const char *opt = argv[i] + 6;
if (!strcmp(opt, "disable-blended-masked-stores")) if (!strcmp(opt, "fast-math"))
g->opt.fastMath = true;
else if (!strcmp(opt, "fast-masked-vload"))
g->opt.fastMaskedVload = true;
else if (!strcmp(opt, "disable-loop-unroll"))
g->opt.unrollLoops = false;
// These are only used for performance tests of specific
// optimizations
else if (!strcmp(opt, "disable-blended-masked-stores"))
g->opt.disableBlendedMaskedStores = true; g->opt.disableBlendedMaskedStores = true;
else if (!strcmp(opt, "disable-coherent-control-flow")) else if (!strcmp(opt, "disable-coherent-control-flow"))
g->opt.disableCoherentControlFlow = true; g->opt.disableCoherentControlFlow = true;

View File

@@ -294,7 +294,7 @@ Optimize(llvm::Module *module, int optLevel) {
llvm::createStandardModulePasses(&optPM, 3, llvm::createStandardModulePasses(&optPM, 3,
false /* opt size */, false /* opt size */,
true /* unit at a time */, true /* unit at a time */,
false /* unroll loops */, g->opt.unrollLoops,
true /* simplify lib calls */, true /* simplify lib calls */,
false /* may have exceptions */, false /* may have exceptions */,
llvm::createFunctionInliningPass()); llvm::createFunctionInliningPass());
@@ -309,7 +309,7 @@ Optimize(llvm::Module *module, int optLevel) {
llvm::createStandardModulePasses(&optPM, 3, llvm::createStandardModulePasses(&optPM, 3,
false /* opt size */, false /* opt size */,
true /* unit at a time */, true /* unit at a time */,
false /* unroll loops */, g->opt.unrollLoops,
true /* simplify lib calls */, true /* simplify lib calls */,
false /* may have exceptions */, false /* may have exceptions */,
llvm::createFunctionInliningPass()); llvm::createFunctionInliningPass());
@@ -318,6 +318,8 @@ Optimize(llvm::Module *module, int optLevel) {
llvm::PassManagerBuilder builder; llvm::PassManagerBuilder builder;
builder.OptLevel = 3; builder.OptLevel = 3;
builder.Inliner = llvm::createFunctionInliningPass(); builder.Inliner = llvm::createFunctionInliningPass();
if (g->opt.unrollLoops == false)
builder.DisableUnrollLoops = true;
builder.populateFunctionPassManager(funcPM); builder.populateFunctionPassManager(funcPM);
builder.populateModulePassManager(optPM); builder.populateModulePassManager(optPM);
optPM.add(CreateIsCompileTimeConstantPass(true)); optPM.add(CreateIsCompileTimeConstantPass(true));