diff --git a/builtins/target-avx512-common.ll b/builtins/target-avx512-common.ll index 877827a2..c26f54de 100644 --- a/builtins/target-avx512-common.ll +++ b/builtins/target-avx512-common.ll @@ -105,13 +105,25 @@ define i16 @__float_to_half_uniform(float %v) nounwind readnone { } ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; math +;; fast math mode + +declare void @llvm.x86.sse.stmxcsr(i8 *) nounwind +declare void @llvm.x86.sse.ldmxcsr(i8 *) nounwind + +define void @__fastmath() nounwind alwaysinline { + %ptr = alloca i32 + %ptr8 = bitcast i32 * %ptr to i8 * + call void @llvm.x86.sse.stmxcsr(i8 * %ptr8) + %oldval = load PTR_OP_ARGS(`i32 ') %ptr + + ; turn on DAZ (64)/FTZ (32768) -> 32832 + %update = or i32 %oldval, 32832 + store i32 %update, i32 *%ptr + call void @llvm.x86.sse.ldmxcsr(i8 * %ptr8) + ret void +} ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; rounding floats - -declare void @__fastmath() nounwind - ;; round/floor/ceil declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone @@ -815,5 +827,3 @@ declare_nvptx() rsqrtd_decl() rcpd_decl() -transcendetals_decl() -trigonometry_decl() diff --git a/ispc.cpp b/ispc.cpp index 05affd32..d4252eb0 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -893,10 +893,10 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo this->m_hasHalf = true; this->m_hasRand = true; this->m_hasGather = this->m_hasScatter = true; - this->m_hasTranscendentals = true; + this->m_hasTranscendentals = false; // For MIC it is set to true due to performance reasons. The option should be tested. - this->m_hasTrigonometry = true; - this->m_hasRsqrtd = this->m_hasRcpd = true; + this->m_hasTrigonometry = false; + this->m_hasRsqrtd = this->m_hasRcpd = false; this->m_hasVecPrefetch = true; CPUfromISA = CPU_KNL; }