Unroll loops by default, add --opt=disable-loop-unroll to disable.
Issue #78.
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
CXX=g++ -m64
|
CXX=g++ -m64
|
||||||
CXXFLAGS=-Iobjs/ -g3 -Wall
|
CXXFLAGS=-Iobjs/ -g3 -Wall
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPCFLAGS=-O2 --fast-math --instrument --arch=x86-64
|
ISPCFLAGS=-O2 --instrument --arch=x86-64
|
||||||
|
|
||||||
default: ao
|
default: ao
|
||||||
|
|
||||||
|
|||||||
1
ispc.cpp
1
ispc.cpp
@@ -256,6 +256,7 @@ Opt::Opt() {
|
|||||||
level = 1;
|
level = 1;
|
||||||
fastMath = false;
|
fastMath = false;
|
||||||
fastMaskedVload = false;
|
fastMaskedVload = false;
|
||||||
|
unrollLoops = true;
|
||||||
disableBlendedMaskedStores = false;
|
disableBlendedMaskedStores = false;
|
||||||
disableCoherentControlFlow = false;
|
disableCoherentControlFlow = false;
|
||||||
disableUniformControlFlow = false;
|
disableUniformControlFlow = false;
|
||||||
|
|||||||
4
ispc.h
4
ispc.h
@@ -244,6 +244,10 @@ struct Opt {
|
|||||||
arrays, so is unsafe in general.) */
|
arrays, so is unsafe in general.) */
|
||||||
bool fastMaskedVload;
|
bool fastMaskedVload;
|
||||||
|
|
||||||
|
/** Indicates when loops should be unrolled (when doing so seems like
|
||||||
|
it will make sense. */
|
||||||
|
bool unrollLoops;
|
||||||
|
|
||||||
/** On targets that don't have a masked store instruction but do have a
|
/** On targets that don't have a masked store instruction but do have a
|
||||||
blending instruction, by default, we simulate masked stores by
|
blending instruction, by default, we simulate masked stores by
|
||||||
loading the old value, blending, and storing the result. This can
|
loading the old value, blending, and storing the result. This can
|
||||||
|
|||||||
31
main.cpp
31
main.cpp
@@ -73,8 +73,6 @@ static void usage(int ret) {
|
|||||||
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
|
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
|
||||||
printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
|
printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
|
||||||
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
|
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
|
||||||
printf(" [--fast-math]\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
|
||||||
printf(" [--fast-masked-vload]\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
|
||||||
printf(" [-g]\t\t\t\tGenerate debugging information\n");
|
printf(" [-g]\t\t\t\tGenerate debugging information\n");
|
||||||
printf(" [--help]\t\t\t\tPrint help\n");
|
printf(" [--help]\t\t\t\tPrint help\n");
|
||||||
printf(" [-h <name>/--header-outfile=<name>]\tOutput filename for header\n");
|
printf(" [-h <name>/--header-outfile=<name>]\tOutput filename for header\n");
|
||||||
@@ -88,8 +86,11 @@ static void usage(int ret) {
|
|||||||
printf(" [--nocpp]\t\t\t\tDon't run the C preprocessor\n");
|
printf(" [--nocpp]\t\t\t\tDon't run the C preprocessor\n");
|
||||||
printf(" [-o <name>/--outfile=<name>]\tOutput filename (may be \"-\" for standard output)\n");
|
printf(" [-o <name>/--outfile=<name>]\tOutput filename (may be \"-\" for standard output)\n");
|
||||||
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
|
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
|
||||||
#if 0
|
|
||||||
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
|
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
|
||||||
|
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
|
||||||
|
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
||||||
|
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
||||||
|
#if 0
|
||||||
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
||||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||||
@@ -198,10 +199,15 @@ int main(int Argc, char *Argv[]) {
|
|||||||
arch = argv[i] + 7;
|
arch = argv[i] + 7;
|
||||||
else if (!strncmp(argv[i], "--cpu=", 6))
|
else if (!strncmp(argv[i], "--cpu=", 6))
|
||||||
cpu = argv[i] + 6;
|
cpu = argv[i] + 6;
|
||||||
else if (!strcmp(argv[i], "--fast-math"))
|
else if (!strcmp(argv[i], "--fast-math")) {
|
||||||
g->opt.fastMath = true;
|
fprintf(stderr, "--fast-math option has been renamed to --opt=fast-math!\n");
|
||||||
else if (!strcmp(argv[i], "--fast-masked-vload"))
|
usage(1);
|
||||||
g->opt.fastMaskedVload = true;
|
}
|
||||||
|
else if (!strcmp(argv[i], "--fast-masked-vload")) {
|
||||||
|
fprintf(stderr, "--fast-masked-vload option has been renamed to "
|
||||||
|
"--opt=fast-masked-vload!\n");
|
||||||
|
usage(1);
|
||||||
|
}
|
||||||
else if (!strcmp(argv[i], "--debug"))
|
else if (!strcmp(argv[i], "--debug"))
|
||||||
g->debugPrint = true;
|
g->debugPrint = true;
|
||||||
else if (!strcmp(argv[i], "--instrument"))
|
else if (!strcmp(argv[i], "--instrument"))
|
||||||
@@ -238,7 +244,16 @@ int main(int Argc, char *Argv[]) {
|
|||||||
}
|
}
|
||||||
else if (!strncmp(argv[i], "--opt=", 6)) {
|
else if (!strncmp(argv[i], "--opt=", 6)) {
|
||||||
const char *opt = argv[i] + 6;
|
const char *opt = argv[i] + 6;
|
||||||
if (!strcmp(opt, "disable-blended-masked-stores"))
|
if (!strcmp(opt, "fast-math"))
|
||||||
|
g->opt.fastMath = true;
|
||||||
|
else if (!strcmp(opt, "fast-masked-vload"))
|
||||||
|
g->opt.fastMaskedVload = true;
|
||||||
|
else if (!strcmp(opt, "disable-loop-unroll"))
|
||||||
|
g->opt.unrollLoops = false;
|
||||||
|
|
||||||
|
// These are only used for performance tests of specific
|
||||||
|
// optimizations
|
||||||
|
else if (!strcmp(opt, "disable-blended-masked-stores"))
|
||||||
g->opt.disableBlendedMaskedStores = true;
|
g->opt.disableBlendedMaskedStores = true;
|
||||||
else if (!strcmp(opt, "disable-coherent-control-flow"))
|
else if (!strcmp(opt, "disable-coherent-control-flow"))
|
||||||
g->opt.disableCoherentControlFlow = true;
|
g->opt.disableCoherentControlFlow = true;
|
||||||
|
|||||||
6
opt.cpp
6
opt.cpp
@@ -294,7 +294,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
llvm::createStandardModulePasses(&optPM, 3,
|
llvm::createStandardModulePasses(&optPM, 3,
|
||||||
false /* opt size */,
|
false /* opt size */,
|
||||||
true /* unit at a time */,
|
true /* unit at a time */,
|
||||||
false /* unroll loops */,
|
g->opt.unrollLoops,
|
||||||
true /* simplify lib calls */,
|
true /* simplify lib calls */,
|
||||||
false /* may have exceptions */,
|
false /* may have exceptions */,
|
||||||
llvm::createFunctionInliningPass());
|
llvm::createFunctionInliningPass());
|
||||||
@@ -309,7 +309,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
llvm::createStandardModulePasses(&optPM, 3,
|
llvm::createStandardModulePasses(&optPM, 3,
|
||||||
false /* opt size */,
|
false /* opt size */,
|
||||||
true /* unit at a time */,
|
true /* unit at a time */,
|
||||||
false /* unroll loops */,
|
g->opt.unrollLoops,
|
||||||
true /* simplify lib calls */,
|
true /* simplify lib calls */,
|
||||||
false /* may have exceptions */,
|
false /* may have exceptions */,
|
||||||
llvm::createFunctionInliningPass());
|
llvm::createFunctionInliningPass());
|
||||||
@@ -318,6 +318,8 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
llvm::PassManagerBuilder builder;
|
llvm::PassManagerBuilder builder;
|
||||||
builder.OptLevel = 3;
|
builder.OptLevel = 3;
|
||||||
builder.Inliner = llvm::createFunctionInliningPass();
|
builder.Inliner = llvm::createFunctionInliningPass();
|
||||||
|
if (g->opt.unrollLoops == false)
|
||||||
|
builder.DisableUnrollLoops = true;
|
||||||
builder.populateFunctionPassManager(funcPM);
|
builder.populateFunctionPassManager(funcPM);
|
||||||
builder.populateModulePassManager(optPM);
|
builder.populateModulePassManager(optPM);
|
||||||
optPM.add(CreateIsCompileTimeConstantPass(true));
|
optPM.add(CreateIsCompileTimeConstantPass(true));
|
||||||
|
|||||||
Reference in New Issue
Block a user