From 19d8f2e2583df85986a51c93a2a7ae82bd12db25 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 3 Aug 2012 10:43:41 -0700 Subject: [PATCH] Generate FMA instructions with AVX2 (when possible). Issue #320. --- ispc.cpp | 17 +++++++++++++++-- ispc.h | 4 ++++ main.cpp | 3 +++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/ispc.cpp b/ispc.cpp index 389cb1c7..29801359 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -366,7 +366,11 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->isa = Target::AVX2; t->nativeVectorWidth = 8; t->vectorWidth = 8; - t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"; + t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand" +#ifndef LLVM_3_1 + ",+fma" +#endif // !LLVM_3_1 + ; t->maskingIsFree = false; t->maskBitCount = 32; t->hasHalf = true; @@ -380,7 +384,11 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->isa = Target::AVX2; t->nativeVectorWidth = 16; t->vectorWidth = 16; - t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"; + t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand" +#ifndef LLVM_3_1 + ",+fma" +#endif // !LLVM_3_1 + ; t->maskingIsFree = false; t->maskBitCount = 32; t->hasHalf = true; @@ -477,6 +485,10 @@ Target::GetTargetMachine() const { #else std::string featuresString = attributes; llvm::TargetOptions options; +#if !defined(LLVM_3_1) + if (g->opt.disableFMA == false) + options.AllowFPOpFusion = llvm::FPOpFusion::Fast; +#endif // !LLVM_3_1 llvm::TargetMachine *targetMachine = target->createTargetMachine(triple, cpu, featuresString, options, relocModel); @@ -627,6 +639,7 @@ Opt::Opt() { force32BitAddressing = true; unrollLoops = true; disableAsserts = false; + disableFMA = false; disableMaskAllOnOptimizations = false; disableHandlePseudoMemoryOps = false; disableBlendedMaskedStores = false; diff --git a/ispc.h b/ispc.h index 66191844..a023cdfc 100644 --- a/ispc.h +++ b/ispc.h @@ -304,6 +304,10 @@ struct Opt { performance in the generated code). */ bool disableAsserts; + /** Indicates whether FMA instructions should be disabled (on targets + that support them). */ + bool disableFMA; + /** If enabled, disables the various optimizations that kick in when the execution mask can be determined to be "all on" at compile time. */ diff --git a/main.cpp b/main.cpp index ca51d6cb..63c4d572 100644 --- a/main.cpp +++ b/main.cpp @@ -115,6 +115,7 @@ usage(int ret) { printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n"); printf(" [--opt=