17
ispc.cpp
17
ispc.cpp
@@ -366,7 +366,11 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
t->isa = Target::AVX2;
|
t->isa = Target::AVX2;
|
||||||
t->nativeVectorWidth = 8;
|
t->nativeVectorWidth = 8;
|
||||||
t->vectorWidth = 8;
|
t->vectorWidth = 8;
|
||||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
|
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
|
||||||
|
#ifndef LLVM_3_1
|
||||||
|
",+fma"
|
||||||
|
#endif // !LLVM_3_1
|
||||||
|
;
|
||||||
t->maskingIsFree = false;
|
t->maskingIsFree = false;
|
||||||
t->maskBitCount = 32;
|
t->maskBitCount = 32;
|
||||||
t->hasHalf = true;
|
t->hasHalf = true;
|
||||||
@@ -380,7 +384,11 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
t->isa = Target::AVX2;
|
t->isa = Target::AVX2;
|
||||||
t->nativeVectorWidth = 16;
|
t->nativeVectorWidth = 16;
|
||||||
t->vectorWidth = 16;
|
t->vectorWidth = 16;
|
||||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
|
t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand"
|
||||||
|
#ifndef LLVM_3_1
|
||||||
|
",+fma"
|
||||||
|
#endif // !LLVM_3_1
|
||||||
|
;
|
||||||
t->maskingIsFree = false;
|
t->maskingIsFree = false;
|
||||||
t->maskBitCount = 32;
|
t->maskBitCount = 32;
|
||||||
t->hasHalf = true;
|
t->hasHalf = true;
|
||||||
@@ -477,6 +485,10 @@ Target::GetTargetMachine() const {
|
|||||||
#else
|
#else
|
||||||
std::string featuresString = attributes;
|
std::string featuresString = attributes;
|
||||||
llvm::TargetOptions options;
|
llvm::TargetOptions options;
|
||||||
|
#if !defined(LLVM_3_1)
|
||||||
|
if (g->opt.disableFMA == false)
|
||||||
|
options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
|
||||||
|
#endif // !LLVM_3_1
|
||||||
llvm::TargetMachine *targetMachine =
|
llvm::TargetMachine *targetMachine =
|
||||||
target->createTargetMachine(triple, cpu, featuresString, options,
|
target->createTargetMachine(triple, cpu, featuresString, options,
|
||||||
relocModel);
|
relocModel);
|
||||||
@@ -627,6 +639,7 @@ Opt::Opt() {
|
|||||||
force32BitAddressing = true;
|
force32BitAddressing = true;
|
||||||
unrollLoops = true;
|
unrollLoops = true;
|
||||||
disableAsserts = false;
|
disableAsserts = false;
|
||||||
|
disableFMA = false;
|
||||||
disableMaskAllOnOptimizations = false;
|
disableMaskAllOnOptimizations = false;
|
||||||
disableHandlePseudoMemoryOps = false;
|
disableHandlePseudoMemoryOps = false;
|
||||||
disableBlendedMaskedStores = false;
|
disableBlendedMaskedStores = false;
|
||||||
|
|||||||
4
ispc.h
4
ispc.h
@@ -304,6 +304,10 @@ struct Opt {
|
|||||||
performance in the generated code). */
|
performance in the generated code). */
|
||||||
bool disableAsserts;
|
bool disableAsserts;
|
||||||
|
|
||||||
|
/** Indicates whether FMA instructions should be disabled (on targets
|
||||||
|
that support them). */
|
||||||
|
bool disableFMA;
|
||||||
|
|
||||||
/** If enabled, disables the various optimizations that kick in when
|
/** If enabled, disables the various optimizations that kick in when
|
||||||
the execution mask can be determined to be "all on" at compile
|
the execution mask can be determined to be "all on" at compile
|
||||||
time. */
|
time. */
|
||||||
|
|||||||
3
main.cpp
3
main.cpp
@@ -115,6 +115,7 @@ usage(int ret) {
|
|||||||
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
|
printf(" [-O0/-O1]\t\t\t\tSet optimization level (-O1 is default)\n");
|
||||||
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
|
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
|
||||||
printf(" disable-assertions\t\tRemove assertion statements from final code.\n");
|
printf(" disable-assertions\t\tRemove assertion statements from final code.\n");
|
||||||
|
printf(" disable-fma\t\t\tDisable 'fused multiply-add' instructions (on targets that support them)\n");
|
||||||
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
|
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
|
||||||
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
||||||
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
||||||
@@ -333,6 +334,8 @@ int main(int Argc, char *Argv[]) {
|
|||||||
g->opt.disableAsserts = true;
|
g->opt.disableAsserts = true;
|
||||||
else if (!strcmp(opt, "disable-loop-unroll"))
|
else if (!strcmp(opt, "disable-loop-unroll"))
|
||||||
g->opt.unrollLoops = false;
|
g->opt.unrollLoops = false;
|
||||||
|
else if (!strcmp(opt, "disable-fma"))
|
||||||
|
g->opt.disableFMA = true;
|
||||||
|
|
||||||
// These are only used for performance tests of specific
|
// These are only used for performance tests of specific
|
||||||
// optimizations
|
// optimizations
|
||||||
|
|||||||
Reference in New Issue
Block a user