17
ispc.cpp
17
ispc.cpp
@@ -366,7 +366,11 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->isa = Target::AVX2;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
|
||||
#ifndef LLVM_3_1
|
||||
",+fma"
|
||||
#endif // !LLVM_3_1
|
||||
;
|
||||
t->maskingIsFree = false;
|
||||
t->maskBitCount = 32;
|
||||
t->hasHalf = true;
|
||||
@@ -380,7 +384,11 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->isa = Target::AVX2;
|
||||
t->nativeVectorWidth = 16;
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
|
||||
#ifndef LLVM_3_1
|
||||
",+fma"
|
||||
#endif // !LLVM_3_1
|
||||
;
|
||||
t->maskingIsFree = false;
|
||||
t->maskBitCount = 32;
|
||||
t->hasHalf = true;
|
||||
@@ -477,6 +485,10 @@ Target::GetTargetMachine() const {
|
||||
#else
|
||||
std::string featuresString = attributes;
|
||||
llvm::TargetOptions options;
|
||||
#if !defined(LLVM_3_1)
|
||||
if (g->opt.disableFMA == false)
|
||||
options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
|
||||
#endif // !LLVM_3_1
|
||||
llvm::TargetMachine *targetMachine =
|
||||
target->createTargetMachine(triple, cpu, featuresString, options,
|
||||
relocModel);
|
||||
@@ -627,6 +639,7 @@ Opt::Opt() {
|
||||
force32BitAddressing = true;
|
||||
unrollLoops = true;
|
||||
disableAsserts = false;
|
||||
disableFMA = false;
|
||||
disableMaskAllOnOptimizations = false;
|
||||
disableHandlePseudoMemoryOps = false;
|
||||
disableBlendedMaskedStores = false;
|
||||
|
||||
4
ispc.h
4
ispc.h
@@ -304,6 +304,10 @@ struct Opt {
|
||||
performance in the generated code). */
|
||||
bool disableAsserts;
|
||||
|
||||
/** Indicates whether FMA instructions should be disabled (on targets
|
||||
that support them). */
|
||||
bool disableFMA;
|
||||
|
||||
/** If enabled, disables the various optimizations that kick in when
|
||||
the execution mask can be determined to be "all on" at compile
|
||||
time. */
|
||||
|
||||
3
main.cpp
3
main.cpp
@@ -115,6 +115,7 @@ usage(int ret) {
|
||||
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
|
||||
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
|
||||
printf(" disable-assertions\t\tRemove assertion statements from final code.\n");
|
||||
printf(" disable-fma\t\t\tDisable 'fused multiply-add' instructions (on targets that support them)\n");
|
||||
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
|
||||
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
||||
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
||||
@@ -333,6 +334,8 @@ int main(int Argc, char *Argv[]) {
|
||||
g->opt.disableAsserts = true;
|
||||
else if (!strcmp(opt, "disable-loop-unroll"))
|
||||
g->opt.unrollLoops = false;
|
||||
else if (!strcmp(opt, "disable-fma"))
|
||||
g->opt.disableFMA = true;
|
||||
|
||||
// These are only used for performance tests of specific
|
||||
// optimizations
|
||||
|
||||
Reference in New Issue
Block a user