diff --git a/builtins.cpp b/builtins.cpp index d6eefde5..da66ed5f 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -497,6 +497,8 @@ lSetInternalFunctions(llvm::Module *module) { "__prefetch_read_uniform_nt", "__rcp_uniform_float", "__rcp_varying_float", + "__rcp_uniform_double", + "__rcp_varying_double", "__rdrand_i16", "__rdrand_i32", "__rdrand_i64", @@ -534,6 +536,8 @@ lSetInternalFunctions(llvm::Module *module) { "__round_varying_float", "__rsqrt_uniform_float", "__rsqrt_varying_float", + "__rsqrt_uniform_double", + "__rsqrt_varying_double", "__set_system_isa", "__sext_uniform_bool", "__sext_varying_bool", diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index f8fd5cd5..802f726b 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -687,3 +687,10 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin binary4to16(ret, double, @llvm.x86.avx.max.pd.256, %0, %1) ret <16 x double> %ret } + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() + diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index e98a3843..2abed8fe 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -559,3 +559,9 @@ gen_scatter(float) gen_scatter(i64) gen_scatter(double) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() + diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll index c43a12a7..40d7ae8c 100644 --- a/builtins/target-generic-1.ll +++ b/builtins/target-generic-1.ll @@ -992,3 +992,9 @@ declare @__float_to_half_varying( %v) nounwind read define_avgs() +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() + diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 2b2b21c9..4f87ad26 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -191,9 +191,13 @@ declare @__max_varying_double(, declare float @__rsqrt_uniform_float(float) nounwind readnone declare float @__rcp_uniform_float(float) nounwind readnone +declare double @__rsqrt_uniform_double(double) nounwind readnone +declare double @__rcp_uniform_double(double) nounwind readnone declare float @__sqrt_uniform_float(float) nounwind readnone declare @__rcp_varying_float() nounwind readnone declare @__rsqrt_varying_float() nounwind readnone +declare @__rcp_varying_double() nounwind readnone +declare @__rsqrt_varying_double() nounwind readnone declare @__sqrt_varying_float() nounwind readnone declare double @__sqrt_uniform_double(double) nounwind readnone diff --git a/builtins/target-neon-16.ll b/builtins/target-neon-16.ll index a0575927..0f144b66 100644 --- a/builtins/target-neon-16.ll +++ b/builtins/target-neon-16.ll @@ -515,3 +515,9 @@ define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone { %r = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %0, <8 x i16> %1) ret <8 x i16> %r } + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() diff --git a/builtins/target-neon-32.ll b/builtins/target-neon-32.ll index 30b062c9..d30efff2 100644 --- a/builtins/target-neon-32.ll +++ b/builtins/target-neon-32.ll @@ -485,3 +485,9 @@ define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone { %r = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %0, <4 x i16> %1) ret <4 x i16> %r } + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() diff --git a/builtins/target-neon-8.ll b/builtins/target-neon-8.ll index 2accfe53..4b6dfa3a 100644 --- a/builtins/target-neon-8.ll +++ b/builtins/target-neon-8.ll @@ -581,3 +581,9 @@ define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone { v8tov16(i16, %r0, %r1, %r) ret <16 x i16> %r } + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index 77bf1a9d..41b4be09 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -652,3 +652,9 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r binary2to8(ret, double, @llvm.x86.sse2.max.pd, %0, %1) ret <8 x double> %ret } + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index e42d4990..ac091cdb 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -587,3 +587,9 @@ gen_scatter(i32) gen_scatter(float) gen_scatter(i64) gen_scatter(double) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() diff --git a/builtins/target-sse4-16.ll b/builtins/target-sse4-16.ll index 72b81ff0..55c94ce6 100644 --- a/builtins/target-sse4-16.ll +++ b/builtins/target-sse4-16.ll @@ -488,3 +488,10 @@ define <8 x i16> @__avg_up_uint16(<8 x i16>, <8 x i16>) { define_avg_up_int8() define_avg_up_int16() define_down_avgs() + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() + diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll index 69b355e3..4d81df92 100644 --- a/builtins/target-sse4-8.ll +++ b/builtins/target-sse4-8.ll @@ -490,3 +490,10 @@ define <16 x i16> @__avg_up_uint16(<16 x i16>, <16 x i16>) nounwind readnone { define_avg_up_int8() define_avg_up_int16() define_down_avgs() + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() + diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index 842db53f..09088dc3 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -592,3 +592,9 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r define_avgs() +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() + diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index 16177b47..747a6a58 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -515,3 +515,9 @@ gen_scatter(double) define_avgs() +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; reciprocals in double precision, if supported + +rsqrt_double() +rcp_double() + diff --git a/builtins/util.m4 b/builtins/util.m4 index f9ae7cd1..a8340adb 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -4531,3 +4531,37 @@ define(`define_avgs', ` define_up_avgs() define_down_avgs() ') + +define(`rsqrt_double', ` +define double @__rsqrt_uniform_double(double) nounwind alwaysinline readnone +{ + %flt = fptrunc double %0 to float + %res = call float @__rsqrt_uniform_float(float %flt) + %dres = fpext float %res to double + ret double %dres +} +define @__rsqrt_varying_double() nounwind alwaysinline readnone +{ + %flt = fptrunc %0 to + %res = call @__rsqrt_varying_float( %flt) + %dres = fpext %res to + ret %dres +} +') + +define(`rcp_double', ` +define double @__rcp_uniform_double(double) nounwind alwaysinline readnone +{ + %flt = fptrunc double %0 to float + %res = call float @__rcp_uniform_float(float %flt) + %dres = fpext float %res to double + ret double %dres +} +define @__rcp_varying_double() nounwind alwaysinline readnone +{ + %flt = fptrunc %0 to + %res = call @__rcp_varying_float( %flt) + %dres = fpext %res to + ret %dres +} +') diff --git a/stdlib.ispc b/stdlib.ispc index 3b17283d..de2221b2 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -3517,6 +3517,15 @@ static inline uniform double sqrt(uniform double v) { return __sqrt_uniform_double(v); } +__declspec(safe) +static inline double rsqrt(double v) { + return __rsqrt_varying_double(v); +} + +__declspec(safe) +static inline uniform double rsqrt(uniform double v) { + return __rsqrt_uniform_double(v); +} __declspec(safe) static inline double ldexp(double x, int n) { unsigned int64 ex = 0x7ff0000000000000;