Generate FMA instructions with AVX2 (when possible).

Issue #320.
This commit is contained in:
Matt Pharr
2012-08-03 10:43:41 -07:00
parent e6aec96e05
commit 19d8f2e258
3 changed files with 22 additions and 2 deletions

View File

@@ -366,7 +366,11 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->isa = Target::AVX2; t->isa = Target::AVX2;
t->nativeVectorWidth = 8; t->nativeVectorWidth = 8;
t->vectorWidth = 8; t->vectorWidth = 8;
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"; t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
#ifndef LLVM_3_1
",+fma"
#endif // !LLVM_3_1
;
t->maskingIsFree = false; t->maskingIsFree = false;
t->maskBitCount = 32; t->maskBitCount = 32;
t->hasHalf = true; t->hasHalf = true;
@@ -380,7 +384,11 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->isa = Target::AVX2; t->isa = Target::AVX2;
t->nativeVectorWidth = 16; t->nativeVectorWidth = 16;
t->vectorWidth = 16; t->vectorWidth = 16;
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"; t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
#ifndef LLVM_3_1
",+fma"
#endif // !LLVM_3_1
;
t->maskingIsFree = false; t->maskingIsFree = false;
t->maskBitCount = 32; t->maskBitCount = 32;
t->hasHalf = true; t->hasHalf = true;
@@ -477,6 +485,10 @@ Target::GetTargetMachine() const {
#else #else
std::string featuresString = attributes; std::string featuresString = attributes;
llvm::TargetOptions options; llvm::TargetOptions options;
#if !defined(LLVM_3_1)
if (g->opt.disableFMA == false)
options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
#endif // !LLVM_3_1
llvm::TargetMachine *targetMachine = llvm::TargetMachine *targetMachine =
target->createTargetMachine(triple, cpu, featuresString, options, target->createTargetMachine(triple, cpu, featuresString, options,
relocModel); relocModel);
@@ -627,6 +639,7 @@ Opt::Opt() {
force32BitAddressing = true; force32BitAddressing = true;
unrollLoops = true; unrollLoops = true;
disableAsserts = false; disableAsserts = false;
disableFMA = false;
disableMaskAllOnOptimizations = false; disableMaskAllOnOptimizations = false;
disableHandlePseudoMemoryOps = false; disableHandlePseudoMemoryOps = false;
disableBlendedMaskedStores = false; disableBlendedMaskedStores = false;

4
ispc.h
View File

@@ -304,6 +304,10 @@ struct Opt {
performance in the generated code). */ performance in the generated code). */
bool disableAsserts; bool disableAsserts;
/** Indicates whether FMA instructions should be disabled (on targets
that support them). */
bool disableFMA;
/** If enabled, disables the various optimizations that kick in when /** If enabled, disables the various optimizations that kick in when
the execution mask can be determined to be "all on" at compile the execution mask can be determined to be "all on" at compile
time. */ time. */

View File

@@ -115,6 +115,7 @@ usage(int ret) {
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n"); printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
printf(" [--opt=<option>]\t\t\tSet optimization option\n"); printf(" [--opt=<option>]\t\t\tSet optimization option\n");
printf(" disable-assertions\t\tRemove assertion statements from final code.\n"); printf(" disable-assertions\t\tRemove assertion statements from final code.\n");
printf(" disable-fma\t\t\tDisable 'fused multiply-add' instructions (on targets that support them)\n");
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n"); printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n"); printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n"); printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
@@ -333,6 +334,8 @@ int main(int Argc, char *Argv[]) {
g->opt.disableAsserts = true; g->opt.disableAsserts = true;
else if (!strcmp(opt, "disable-loop-unroll")) else if (!strcmp(opt, "disable-loop-unroll"))
g->opt.unrollLoops = false; g->opt.unrollLoops = false;
else if (!strcmp(opt, "disable-fma"))
g->opt.disableFMA = true;
// These are only used for performance tests of specific // These are only used for performance tests of specific
// optimizations // optimizations