From c59cff396d4df0ffc74e9425072699b5ea750b7d Mon Sep 17 00:00:00 2001 From: evghenii Date: Wed, 5 Feb 2014 13:55:38 +0100 Subject: [PATCH] added {rsqrt,rcp}d support for knc.h. test-147.ispc & test-148.ispc pass. --- examples/intrinsics/knc.h | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h index 72951845..458da458 100644 --- a/examples/intrinsics/knc.h +++ b/examples/intrinsics/knc.h @@ -1473,18 +1473,14 @@ static FORCEINLINE __vec16_f __rcp_varying_float(__vec16_f v) { #endif } static FORCEINLINE __vec16_d __rcp_varying_double(__vec16_d x) { - __vec16_i64 ex = __and(__cast_bits(__vec16_i64(), x), __smear_i64<__vec16_i64>(0x7fe0000000000000)); - __vec16_d exp = __cast_bits(__vec16_d(), __sub(__smear_i64<__vec16_i64>(0x7fd0000000000000), ex)); - __vec16_f xf = __cast_fptrunc(__vec16_f(), __mul(x, exp)); - __vec16_f yf = __rcp_varying_float(xf); - __vec16_d y = __mul(__cast_fpext(__vec16_d(), yf), exp); - y = __add(y, __mul(y, __sub(__smear_double<__vec16_d>(2.0), __mul(x, y)))); - y = __add(y, __mul(y, __sub(__smear_double<__vec16_d>(2.0), __mul(x, y)))); - return y; + __vec16_d y; + for (int i = 0; i < 16; i++) + __insert_element(&y, i, 1.0/__extract_element(x,i)); + return y; } static FORCEINLINE double __rcp_uniform_double(double v) { - return __extract_element(__rcp_varying_double(__smear_double<__vec16_d>(v)),0); + return 1.0/v; } @@ -1496,20 +1492,14 @@ static FORCEINLINE __vec16_f __rsqrt_varying_float(__vec16_f v) { #endif } static FORCEINLINE __vec16_d __rsqrt_varying_double(__vec16_d x) { - __vec16_i64 ex = __and(__cast_bits(__vec16_i64(), x), __smear_i64<__vec16_i64>(0x7fe0000000000000)); - __vec16_d exp = __cast_bits(__vec16_d(), __sub(__smear_i64<__vec16_i64>(0x7fd0000000000000), ex)); - __vec16_d exph = __cast_bits(__vec16_d(), __sub(__smear_i64<__vec16_i64>(0x5fe0000000000000), __lshr(ex,1))); - __vec16_f xf = __cast_fptrunc(__vec16_f(), __mul(x, exp)); - __vec16_f yf = __rsqrt_varying_float(xf); - __vec16_d y = __mul(__cast_fpext(__vec16_d(), yf), exph); - __vec16_d xh = __mul(x, __smear_double<__vec16_d>(0.5)); - y = __add(y, __mul(y, __sub(__smear_double<__vec16_d>(0.5), __mul(xh, __mul(y,y))))); - y = __add(y, __mul(y, __sub(__smear_double<__vec16_d>(0.5), __mul(xh, __mul(y,y))))); - return y; + __vec16_d y; + for (int i = 0; i < 16; i++) + __insert_element(&y, i, 1.0/sqrt(__extract_element(x,i))); + return y; } static FORCEINLINE double __rsqrt_uniform_double(double v) { - return __extract_element(__rsqrt_varying_double(__smear_double<__vec16_d>(v)),0); + return 1.0/v; }