diff --git a/builtins/target-avx512-common.ll b/builtins/target-avx512-common.ll
index 877827a2..c26f54de 100644
--- a/builtins/target-avx512-common.ll
+++ b/builtins/target-avx512-common.ll
@@ -105,13 +105,25 @@ define i16 @__float_to_half_uniform(float %v) nounwind readnone {
 }
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; math
+;; fast math mode
+
+declare void @llvm.x86.sse.stmxcsr(i8 *) nounwind
+declare void @llvm.x86.sse.ldmxcsr(i8 *) nounwind
+
+define void @__fastmath() nounwind alwaysinline {
+  %ptr = alloca i32
+  %ptr8 = bitcast i32 * %ptr to i8 *
+  call void @llvm.x86.sse.stmxcsr(i8 * %ptr8)
+  %oldval = load PTR_OP_ARGS(`i32 ') %ptr
+
+  ; turn on DAZ (64)/FTZ (32768) -> 32832
+  %update = or i32 %oldval, 32832
+  store i32 %update, i32 *%ptr
+  call void @llvm.x86.sse.ldmxcsr(i8 * %ptr8)
+  ret void
+}
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; rounding floats
-
-declare void @__fastmath() nounwind 
-
 ;; round/floor/ceil
 
 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
@@ -815,5 +827,3 @@ declare_nvptx()
 rsqrtd_decl()
 rcpd_decl()
 
-transcendetals_decl()
-trigonometry_decl()
diff --git a/ispc.cpp b/ispc.cpp
index 05affd32..d4252eb0 100644
--- a/ispc.cpp
+++ b/ispc.cpp
@@ -893,10 +893,10 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
         this->m_hasHalf = true;
         this->m_hasRand = true;
         this->m_hasGather = this->m_hasScatter = true;
-        this->m_hasTranscendentals = true;
+        this->m_hasTranscendentals = false;
         // For MIC it is set to true due to performance reasons. The option should be tested.
-        this->m_hasTrigonometry = true;
-        this->m_hasRsqrtd = this->m_hasRcpd = true;
+        this->m_hasTrigonometry = false;
+        this->m_hasRsqrtd = this->m_hasRcpd = false;
         this->m_hasVecPrefetch = true;
         CPUfromISA = CPU_KNL;
     }