Generate FMA instructions with AVX2 (when possible).

Issue #320.
This commit is contained in:
Matt Pharr
2012-08-03 10:43:41 -07:00
parent e6aec96e05
commit 19d8f2e258
3 changed files with 22 additions and 2 deletions

View File

@@ -366,7 +366,11 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->isa = Target::AVX2;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
#ifndef LLVM_3_1
",+fma"
#endif // !LLVM_3_1
;
t->maskingIsFree = false;
t->maskBitCount = 32;
t->hasHalf = true;
@@ -380,7 +384,11 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->isa = Target::AVX2;
t->nativeVectorWidth = 16;
t->vectorWidth = 16;
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
#ifndef LLVM_3_1
",+fma"
#endif // !LLVM_3_1
;
t->maskingIsFree = false;
t->maskBitCount = 32;
t->hasHalf = true;
@@ -477,6 +485,10 @@ Target::GetTargetMachine() const {
#else
std::string featuresString = attributes;
llvm::TargetOptions options;
#if !defined(LLVM_3_1)
if (g->opt.disableFMA == false)
options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
#endif // !LLVM_3_1
llvm::TargetMachine *targetMachine =
target->createTargetMachine(triple, cpu, featuresString, options,
relocModel);
@@ -627,6 +639,7 @@ Opt::Opt() {
force32BitAddressing = true;
unrollLoops = true;
disableAsserts = false;
disableFMA = false;
disableMaskAllOnOptimizations = false;
disableHandlePseudoMemoryOps = false;
disableBlendedMaskedStores = false;

4
ispc.h
View File

@@ -304,6 +304,10 @@ struct Opt {
performance in the generated code). */
bool disableAsserts;
/** Indicates whether FMA instructions should be disabled (on targets
that support them). */
bool disableFMA;
/** If enabled, disables the various optimizations that kick in when
the execution mask can be determined to be "all on" at compile
time. */

View File

@@ -115,6 +115,7 @@ usage(int ret) {
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
printf(" disable-assertions\t\tRemove assertion statements from final code.\n");
printf(" disable-fma\t\t\tDisable 'fused multiply-add' instructions (on targets that support them)\n");
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
@@ -333,6 +334,8 @@ int main(int Argc, char *Argv[]) {
g->opt.disableAsserts = true;
else if (!strcmp(opt, "disable-loop-unroll"))
g->opt.unrollLoops = false;
else if (!strcmp(opt, "disable-fma"))
g->opt.disableFMA = true;
// These are only used for performance tests of specific
// optimizations