From a66fab4cea156e5a0eb58d05461f6561736a91bc Mon Sep 17 00:00:00 2001 From: Andrey Guskov Date: Wed, 25 Mar 2015 16:11:50 +0300 Subject: [PATCH 1/3] Fix for LLVM trunk (rL232885) --- ispc.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ispc.cpp b/ispc.cpp index 8741c8c7..393f639d 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -930,12 +930,10 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : // 1. Get default data layout first std::string dl_string; -#if defined(LLVM_3_2) || defined(LLVM_3_3) || defined(LLVM_3_4) || defined(LLVM_3_5) - dl_string = m_targetMachine->getDataLayout()->getStringRepresentation(); -#elif defined(LLVM_3_6) +#if defined(LLVM_3_6) dl_string = m_targetMachine->getSubtargetImpl()->getDataLayout()->getStringRepresentation(); -#else // LLVM 3.7+ - dl_string = m_targetMachine->getSubtargetImpl()->getTargetLowering()->getDataLayout()->getStringRepresentation(); +#else // LLVM 3.5- and LLVM 3.7+ + dl_string = m_targetMachine->getDataLayout()->getStringRepresentation(); #endif // 2. Adjust for generic if (m_isa == Target::GENERIC) { From 50f716f3d8c6b3d823cafd15bd36da319e20f433 Mon Sep 17 00:00:00 2001 From: Anton Mitrokhin Date: Wed, 25 Mar 2015 16:57:30 +0300 Subject: [PATCH 2/3] fix for mishandled __ISPC_NO_EXTERN_C var being defined as zero --- module.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module.cpp b/module.cpp index 0c945dba..1fa3c9a0 100644 --- a/module.cpp +++ b/module.cpp @@ -1743,7 +1743,7 @@ static void lPrintFunctionDeclarations(FILE *file, const std::vector &funcs, bool useExternC=1, bool rewriteForDispatch=false) { if (useExternC) - fprintf(file, "#if defined(__cplusplus) && !defined(__ISPC_NO_EXTERN_C)\nextern \"C\" {\n#endif // __cplusplus\n"); + fprintf(file, "#if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C )\nextern \"C\" {\n#endif // __cplusplus\n"); // fprintf(file, "#ifdef __cplusplus\nextern \"C\" {\n#endif // __cplusplus\n"); for (unsigned int i = 0; i < funcs.size(); ++i) { const FunctionType *ftype = CastType(funcs[i]->type); @@ -1759,7 +1759,7 @@ lPrintFunctionDeclarations(FILE *file, const std::vector &funcs, } if (useExternC) - fprintf(file, "#if defined(__cplusplus) && !defined(__ISPC_NO_EXTERN_C)\n} /* end extern C */\n#endif // __cplusplus\n"); + fprintf(file, "#if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C )\n} /* end extern C */\n#endif // __cplusplus\n"); // fprintf(file, "#ifdef __cplusplus\n} /* end extern C */\n#endif // __cplusplus\n"); } From cd6f8249bf4b9fc5e495cf8cda70194ab8a2f88d Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskiy Date: Thu, 26 Mar 2015 12:11:28 +0300 Subject: [PATCH 3/3] cast_fptosi/ui for double to i32 --- examples/intrinsics/knl.h | 17 +++++++++++------ examples/intrinsics/known_fails.txt | 13 +++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 examples/intrinsics/known_fails.txt diff --git a/examples/intrinsics/knl.h b/examples/intrinsics/knl.h index 2814674a..cfbbf00a 100644 --- a/examples/intrinsics/knl.h +++ b/examples/intrinsics/knl.h @@ -1985,9 +1985,11 @@ static FORCEINLINE __vec16_i64 __cast_fptosi(__vec16_i64, __vec16_f val) { } static FORCEINLINE __vec16_i32 __cast_fptosi(__vec16_i32, __vec16_d val) { - __vec16_i32 tmp = _mm512_cvtfxpnt_roundpd_epi32lo(val.v_hi, _MM_ROUND_MODE_TOWARD_ZERO); - __vec16_i32 ret_hi8 = _mm512_permute4f128_epi32(tmp, _MM_PERM_BADC); - __vec16_i32 ret_lo8 = _mm512_cvtfxpnt_roundpd_epi32lo(val.v_lo, _MM_ROUND_MODE_TOWARD_ZERO); + __m256i tmp = _mm512_cvtpd_epi32(val.v_hi); + __vec16_i32 tmp1 = _mm512_castsi256_si512 (tmp); + __vec16_i32 ret_hi8 = _mm512_permute4f128_epi32(tmp1, _MM_PERM_BADC); + __m256i tmp2 = _mm512_cvtpd_epi32(val.v_lo); + __vec16_i32 ret_lo8 = _mm512_castsi256_si512 (tmp2); return _mm512_xor_epi32(ret_lo8, ret_hi8); } @@ -2049,9 +2051,11 @@ static FORCEINLINE __vec16_i64 __cast_fptoui(__vec16_i64, __vec16_f val) { } static FORCEINLINE __vec16_i32 __cast_fptoui(__vec16_i32, __vec16_d val) { - __vec16_i32 tmp = _mm512_cvtfxpnt_roundpd_epu32lo(val.v_hi, _MM_ROUND_MODE_TOWARD_ZERO); - __vec16_i32 ret_hi8 = _mm512_permute4f128_epi32(tmp, _MM_PERM_BADC); - __vec16_i32 ret_lo8 = _mm512_cvtfxpnt_roundpd_epu32lo(val.v_lo, _MM_ROUND_MODE_TOWARD_ZERO); + __m256i tmp = _mm512_cvtpd_epu32(val.v_hi); + __vec16_i32 tmp1 = _mm512_castsi256_si512 (tmp); + __vec16_i32 ret_hi8 = _mm512_permute4f128_epi32(tmp1, _MM_PERM_BADC); + __m256i tmp2 = _mm512_cvtpd_epu32(val.v_lo); + __vec16_i32 ret_lo8 = _mm512_castsi256_si512 (tmp2); return _mm512_xor_epi32(ret_lo8, ret_hi8); } @@ -2434,6 +2438,7 @@ static FORCEINLINE double __ceil_uniform_double(double v) { } static FORCEINLINE __vec16_f __round_varying_float(__vec16_f v) { + return _mm512_round_ps(v, _MM_ROUND_MODE_NEAREST, _MM_EXPADJ_NONE); } diff --git a/examples/intrinsics/known_fails.txt b/examples/intrinsics/known_fails.txt new file mode 100644 index 00000000..cda10901 --- /dev/null +++ b/examples/intrinsics/known_fails.txt @@ -0,0 +1,13 @@ +=============================================================================== +__and_not2 : _mm512_kandnr -> _mm512_kandn + ./tests/cfor-c-cif-nested-continue.ispc + ./tests/cfor-c-test-134.ispc + ./tests/cfor-c-test-135.ispc + ./tests/cfor-c-test-136.ispc + ./tests/cfor-c-test-64.ispc + ./tests/cfor-c-test-70.ispc + ./tests/cfor-c-test-71.ispc + ./tests/recursion-forward-func-decl.ispc + ./tests/recursion.ispc +=============================================================================== +