From 788098b55f14f27e203ec7c4f00b2b10a9ae9669 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Thu, 20 Feb 2014 19:15:09 +0400 Subject: [PATCH 01/28] Use icpc for KNC, not icc --- docs/ispc.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index 2c41301c..b1596d56 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -655,14 +655,14 @@ To compile for Xeon Phi™, first generate intermediate C++ code: The ``ispc`` distribution now includes a header file, ``examples/intrinsics/knc.h``, which maps from the generic C++ output to the corresponding intrinsic operations supported by Intel Xeon Phi™. -Thus, to generate an object file, use the Intel C Compiler (``icc``) compile +Thus, to generate an object file, use the Intel C++ Compiler (``icpc``) compile the C++ code generated by ``ispc``, setting the ``#include`` search path so that it can find the ``examples/intrinsics/knc.h`` header file in the ``ispc`` distribution. :: - icc -mmic -Iexamples/intrinsics/ foo.cpp -o foo.o + icpc -mmic -Iexamples/intrinsics/ foo.cpp -o foo.o With the current beta implementation, complex ``ispc`` programs are able to run on Xeon Phi™, though there are a number of known limitations: @@ -683,14 +683,14 @@ run on Xeon Phi™, though there are a number of known limitations: where the memory address is actually aligned. This may unnecessarily impact performance. -* When requesting that ICC generate code with strict floating point - precision compliance (using ICC option ``-fp-model strict``) or - accurate reporting of floating point exceptions (using ICC option +* When requesting that ICPC generate code with strict floating point + precision compliance (using ICPC option ``-fp-model strict``) or + accurate reporting of floating point exceptions (using ICPC option ``-fp-model except``) the compiler will generate code that uses the x87 unit rather than Xeon Phi™'s vector unit. For similar reasons, the options ``–ansi`` and ``–fmath-errno`` may result in calls to math functions that are implemented in x87 rather than using vector instructions. - This will have a significant performance impact. See the ICC manual for + This will have a significant performance impact. See the ICPC manual for details on these compiler options. All of these issues are currently actively being addressed and will be @@ -3427,7 +3427,7 @@ for this argument. * ``fast``: more efficient but lower accuracy versions of the default ``ispc`` implementations. * ``svml``: use Intel "Short Vector Math Library". Use - ``icc`` to link your final executable so that the appropriate libraries + ``icpc`` to link your final executable so that the appropriate libraries are linked. * ``system``: use the system's math library. On many systems, these functions are more accurate than both of ``ispc``'s implementations. From f0a7baf340bab7c869fb99b9abddb757eca2f860 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Sat, 22 Feb 2014 01:10:55 +0400 Subject: [PATCH 02/28] Remove conflicting __extract_element(__vec16_i64 ..., ...) --- examples/intrinsics/knc.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h index 458da458..36bc26b8 100644 --- a/examples/intrinsics/knc.h +++ b/examples/intrinsics/knc.h @@ -598,8 +598,10 @@ template <> static FORCEINLINE void __store<64>(__vec16_i32 *p, __vec16_i32 v) { // int64 /////////////////////////////////////////////////////////////////////////// -static FORCEINLINE int64_t __extract_element(__vec16_i64 v, uint32_t index) { - return (uint64_t(((int32_t *)&v.v_hi)[index])<<32) | (uint64_t(((int32_t *)&v.v_lo)[index])); +static FORCEINLINE int64_t __extract_element(const __vec16_i64 &v, uint32_t index) +{ + uint *src = (uint *)&v; + return src[index+16] | (int64_t(src[index]) << 32); } static FORCEINLINE void __insert_element(__vec16_i64 *v, uint32_t index, int64_t val) { @@ -750,12 +752,6 @@ static FORCEINLINE __vec16_i64 __select(__vec16_i1 mask, return ret; } -static FORCEINLINE int64_t __extract_element(const __vec16_i64 &v, uint32_t index) -{ - uint *src = (uint *)&v; - return src[index+16] | (int64_t(src[index]) << 32); -} - template RetVecType __smear_i64(const int64_t &l); template <> FORCEINLINE __vec16_i64 __smear_i64<__vec16_i64>(const int64_t &l) { const int *i = (const int*)&l; From af836cda27f38010333454b023f838de37c610fd Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskij Date: Sun, 23 Feb 2014 19:48:03 +0400 Subject: [PATCH 03/28] Saturating multiplication for int64 was added. --- stdlib.ispc | 36 ++++++++++++++++++++++++++++++++++++ tests/pmuls_i64.ispc | 28 ++++++++++++++++++++++++++++ tests/pmuls_vi64.ispc | 28 ++++++++++++++++++++++++++++ tests/pmulus_i64.ispc | 28 ++++++++++++++++++++++++++++ tests/pmulus_vi64.ispc | 28 ++++++++++++++++++++++++++++ 5 files changed, 148 insertions(+) create mode 100644 tests/pmuls_i64.ispc create mode 100644 tests/pmuls_vi64.ispc create mode 100644 tests/pmulus_i64.ispc create mode 100644 tests/pmulus_vi64.ispc diff --git a/stdlib.ispc b/stdlib.ispc index 1582a04a..28dd8970 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -4885,6 +4885,28 @@ static inline varying int32 saturating_mul(varying int32 a, varying int32 b) { return result; } +static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { + uniform int64 a_abs = (a > 0) ? a : -a; + uniform int64 b_abs = (b > 0) ? b : -b; + if (a_abs > (INT64_MAX / b_abs)) + if (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) + return INT64_MAX; + else + return INT64_MIN; + return a * b; +} + +static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { + varying int64 a_abs = (a > 0) ? a : -a; + varying int64 b_abs = (b > 0) ? b : -b; + if (a_abs > (INT64_MAX / b_abs)) + if (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) + return INT64_MAX; + else + return INT64_MIN; + return a * b; +} + static inline uniform unsigned int8 saturating_mul(uniform unsigned int8 a, uniform unsigned int8 b) { uniform unsigned int16 result = (uniform unsigned int16) a * @@ -4938,6 +4960,20 @@ static inline varying unsigned int32 saturating_mul(varying unsigned int32 a, varying unsigned int32 lo = result; return lo | - (varying int32) !! hi; } + +static inline uniform unsigned int64 saturating_mul(uniform unsigned int64 a, + uniform unsigned int64 b) { + if (a > (UINT64_MAX / b)) + return UINT64_MAX; + return a * b; +} + +static inline varying unsigned int64 saturating_mul(varying unsigned int64 a, + varying unsigned int64 b) { + if (a > (UINT64_MAX / b)) + return UINT64_MAX; + return a * b; +} /////////////////////////////////////////////////////////////////////////// // rdrand diff --git a/tests/pmuls_i64.ispc b/tests/pmuls_i64.ispc new file mode 100644 index 00000000..a04ca698 --- /dev/null +++ b/tests/pmuls_i64.ispc @@ -0,0 +1,28 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int64 a_max = 0x7FFFFFFFFFFFFFFF, a_min = -0x8000000000000000; // max and min signed int64 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_mul(a_max, (uniform int64) b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_mul(a_min, (uniform int64) b); + } + else { + RET[programIndex] = saturating_mul((uniform int64) b, + (uniform int64) b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (uniform int64) 0x7FFFFFFFFFFFFFFF; // max signed int64 + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (uniform int64) -0x8000000000000000; // min signed int64 + } + else { + RET[programIndex] = (uniform int64) 25; + } +} diff --git a/tests/pmuls_vi64.ispc b/tests/pmuls_vi64.ispc new file mode 100644 index 00000000..32df2fac --- /dev/null +++ b/tests/pmuls_vi64.ispc @@ -0,0 +1,28 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying int64 a_max = 0x7FFFFFFFFFFFFFFF, a_min = -0x8000000000000000; // max and min signed int64 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_mul(a_max, (varying int64) b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_mul(a_min, (varying int64) b); + } + else { + RET[programIndex] = saturating_mul((varying int64) b, + (varying int64) b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (varying int64) 0x7FFFFFFFFFFFFFFF; // max signed int64 + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (varying int64) -0x8000000000000000; // min signed int64 + } + else { + RET[programIndex] = (varying int64) 25; + } +} diff --git a/tests/pmulus_i64.ispc b/tests/pmulus_i64.ispc new file mode 100644 index 00000000..179902a3 --- /dev/null +++ b/tests/pmulus_i64.ispc @@ -0,0 +1,28 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int64 a_max = 0xFFFFFFFFFFFFFFFF, a_min = 0; // max and min unsigned int64 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_mul(a_max, (uniform unsigned int64) b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_mul(a_min, (uniform unsigned int64) -b); + } + else { + RET[programIndex] = saturating_mul((uniform unsigned int64) b, + (uniform unsigned int64) b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (uniform unsigned int64) 0xFFFFFFFFFFFFFFFF; // max unsigned int64 + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (uniform unsigned int64) 0; // min unsigned int64 + } + else { + RET[programIndex] = (uniform unsigned int64) 25; + } +} diff --git a/tests/pmulus_vi64.ispc b/tests/pmulus_vi64.ispc new file mode 100644 index 00000000..43ae9aac --- /dev/null +++ b/tests/pmulus_vi64.ispc @@ -0,0 +1,28 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying unsigned int64 a_max = 0xFFFFFFFFFFFFFFFF, a_min = 0; // max and min unsigned int64 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_mul(a_max, (varying unsigned int64) b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_mul(a_min, (varying unsigned int64) -b); + } + else { + RET[programIndex] = saturating_mul((varying unsigned int64) b, + (varying unsigned int64) b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (varying unsigned int64) 0xFFFFFFFFFFFFFFFF; // max unsigned int64 + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (varying unsigned int64) 0; // min unsigned int64 + } + else { + RET[programIndex] = (varying unsigned int64) 25; + } +} From 77e4564020b1455376e08b1834575f833a2eb9dc Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Tue, 25 Feb 2014 14:25:22 +0400 Subject: [PATCH 04/28] supporting LLVM trunk after r202052 revision --- module.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/module.cpp b/module.cpp index 94682dc0..c78a9882 100644 --- a/module.cpp +++ b/module.cpp @@ -1081,9 +1081,13 @@ Module::writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine, bool binary = (fileType == llvm::TargetMachine::CGFT_ObjectFile); #if defined(LLVM_3_1) || defined(LLVM_3_2) || defined(LLVM_3_3) unsigned int flags = binary ? llvm::raw_fd_ostream::F_Binary : 0; -#else +#elif defined(LLVM_3_4) llvm::sys::fs::OpenFlags flags = binary ? llvm::sys::fs::F_Binary : llvm::sys::fs::F_None; +#else + llvm::sys::fs::OpenFlags flags = binary ? llvm::sys::fs::F_None : + llvm::sys::fs::F_Text; + #endif std::string error; From 06c06456c4bd2b2b76e40b13d02776536b97dbc4 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Wed, 26 Feb 2014 17:06:58 +0400 Subject: [PATCH 05/28] support LLVM trunk after r202168 r202190 revisions --- builtins.cpp | 12 ++++++++++++ module.cpp | 4 ++-- opt.cpp | 12 ++++++------ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/builtins.cpp b/builtins.cpp index 8248ce53..8048bb5c 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -758,6 +758,17 @@ AddBitcodeToModule(const unsigned char *bitcode, int length, // architecture and investigate what happened. // Generally we allow library DataLayout to be subset of module // DataLayout or library DataLayout to be empty. +#if defined(LLVM_3_5) + if (!VerifyDataLayoutCompatibility(module->getDataLayoutStr(), + bcModule->getDataLayoutStr())) { + Warning(SourcePos(), "Module DataLayout is incompatible with " + "library DataLayout:\n" + "Module DL: %s\n" + "Library DL: %s\n", + module->getDataLayoutStr().c_str(), + bcModule->getDataLayoutStr().c_str()); + } +#else if (!VerifyDataLayoutCompatibility(module->getDataLayout(), bcModule->getDataLayout())) { Warning(SourcePos(), "Module DataLayout is incompatible with " @@ -767,6 +778,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length, module->getDataLayout().c_str(), bcModule->getDataLayout().c_str()); } +#endif } bcModule->setTargetTriple(mTriple.str()); diff --git a/module.cpp b/module.cpp index c78a9882..f43096be 100644 --- a/module.cpp +++ b/module.cpp @@ -1098,8 +1098,8 @@ Module::writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine, } llvm::PassManager pm; -#if defined(LLVM_3_1) - pm.add(new llvm::TargetData(*g->target->getDataLayout())); +#if defined(LLVM_3_5) + pm.add(new llvm::DataLayoutPass(*g->target->getDataLayout())); #else pm.add(new llvm::DataLayout(*g->target->getDataLayout())); #endif diff --git a/opt.cpp b/opt.cpp index 0ec7dd67..e7e98ad7 100644 --- a/opt.cpp +++ b/opt.cpp @@ -473,19 +473,19 @@ Optimize(llvm::Module *module, int optLevel) { new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple())); optPM.add(targetLibraryInfo); - -#if defined(LLVM_3_1) - optPM.add(new llvm::TargetData(*g->target->getDataLayout())); +#if defined(LLVM_3_5) + optPM.add(new llvm::DataLayoutPass(*g->target->getDataLayout())); #else optPM.add(new llvm::DataLayout(*g->target->getDataLayout())); +#endif llvm::TargetMachine *targetMachine = g->target->GetTargetMachine(); - #ifdef LLVM_3_2 + +#ifdef LLVM_3_2 optPM.add(new llvm::TargetTransformInfo(targetMachine->getScalarTargetTransformInfo(), targetMachine->getVectorTargetTransformInfo())); - #else // LLVM 3.3+ +#else // LLVM 3.3+ targetMachine->addAnalysisPasses(optPM.getPM()); - #endif #endif optPM.add(llvm::createIndVarSimplifyPass()); From 91621d7b179ae7a8e30dd279d18ad4272bb011e3 Mon Sep 17 00:00:00 2001 From: jbrodman Date: Thu, 27 Feb 2014 02:38:57 -0800 Subject: [PATCH 06/28] Update docs to warn of the difference between sizeof(float) and sizeof(uniform float) --- docs/ispc.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index 2c41301c..8333fdf5 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -4038,7 +4038,9 @@ overlap. void memmove(void * varying dst, void * varying src, int32 count) Note that there are variants of these functions that take both ``uniform`` -and ``varying`` pointers. +and ``varying`` pointers. Also note that ``sizeof(float)`` and +``sizeof(uniform float)`` return different values, so programmers should +take care when calculating ``count``. To initialize values in memory, the ``memset`` routine can be used. (It also behaves like the function of the same name in the C Standard Library.) From 9ef9f0bf327f205ef93a45f20d6bc91c20151fd3 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 28 Feb 2014 20:01:34 +0400 Subject: [PATCH 07/28] Migrating to VS solution files to VS2012 --- examples/aobench/aobench.vcxproj | 14 +++++++++++++- .../aobench_instrumented.vcxproj | 6 +++++- examples/deferred/deferred_shading.vcxproj | 18 +++++++++++++++--- examples/examples.sln | 4 ++-- examples/mandelbrot/mandelbrot.vcxproj | 14 +++++++++++++- .../mandelbrot_tasks/mandelbrot_tasks.vcxproj | 18 +++++++++++++++--- examples/noise/noise.vcxproj | 18 +++++++++++++++--- examples/options/options.vcxproj | 18 +++++++++++++++--- examples/perfbench/perfbench.vcxproj | 8 ++++++-- examples/rt/rt.vcxproj | 18 +++++++++++++++--- examples/simple/simple.vcxproj | 6 +++++- examples/sort/sort.vcxproj | 16 ++++++++++++++-- examples/stencil/stencil.vcxproj | 16 ++++++++++++++-- examples/volume_rendering/volume.vcxproj | 18 +++++++++++++++--- ispc.sln | 4 ++-- 15 files changed, 164 insertions(+), 32 deletions(-) diff --git a/examples/aobench/aobench.vcxproj b/examples/aobench/aobench.vcxproj index 298be2cb..66918e2a 100644 --- a/examples/aobench/aobench.vcxproj +++ b/examples/aobench/aobench.vcxproj @@ -25,10 +25,22 @@ ao sse2,sse4,avx1-i32x8 + + v110 + + + v110 + + + v110 + + + v110 + - + \ No newline at end of file diff --git a/examples/aobench_instrumented/aobench_instrumented.vcxproj b/examples/aobench_instrumented/aobench_instrumented.vcxproj index 5247762c..c1abf354 100644 --- a/examples/aobench_instrumented/aobench_instrumented.vcxproj +++ b/examples/aobench_instrumented/aobench_instrumented.vcxproj @@ -51,23 +51,27 @@ Application true Unicode + v110 Application true Unicode + v110 Application false true Unicode + v110 Application false true Unicode + v110 @@ -172,4 +176,4 @@ - + \ No newline at end of file diff --git a/examples/deferred/deferred_shading.vcxproj b/examples/deferred/deferred_shading.vcxproj index 974e870b..51c217ee 100755 --- a/examples/deferred/deferred_shading.vcxproj +++ b/examples/deferred/deferred_shading.vcxproj @@ -1,4 +1,4 @@ - + @@ -17,7 +17,7 @@ Release x64 - + {87f53c53-957e-4e91-878a-bc27828fb9eb} Win32Proj @@ -25,6 +25,18 @@ kernels sse2,sse4-x2,avx1-x2 + + v110 + + + v110 + + + v110 + + + v110 + @@ -33,4 +45,4 @@ - + \ No newline at end of file diff --git a/examples/examples.sln b/examples/examples.sln index 2285f6a6..5de51b74 100755 --- a/examples/examples.sln +++ b/examples/examples.sln @@ -1,6 +1,6 @@  -Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual Studio 2010 +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2012 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "simple", "simple\simple.vcxproj", "{947C5311-8B78-4D05-BEE4-BCF342D4B367}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rt", "rt\rt.vcxproj", "{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}" diff --git a/examples/mandelbrot/mandelbrot.vcxproj b/examples/mandelbrot/mandelbrot.vcxproj index 7a5f6e03..406986fa 100644 --- a/examples/mandelbrot/mandelbrot.vcxproj +++ b/examples/mandelbrot/mandelbrot.vcxproj @@ -25,9 +25,21 @@ mandelbrot sse2,sse4-x2,avx1-x2 + + v110 + + + v110 + + + v110 + + + v110 + - + \ No newline at end of file diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj index 113fc4e8..57f741f4 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj @@ -1,4 +1,4 @@ - + @@ -17,7 +17,7 @@ Release x64 - + {E80DA7D4-AB22-4648-A068-327307156BE6} Win32Proj @@ -25,10 +25,22 @@ mandelbrot_tasks sse2,sse4-x2,avx1-x2 + + v110 + + + v110 + + + v110 + + + v110 + - + \ No newline at end of file diff --git a/examples/noise/noise.vcxproj b/examples/noise/noise.vcxproj index ff3953ae..b7f87354 100644 --- a/examples/noise/noise.vcxproj +++ b/examples/noise/noise.vcxproj @@ -1,4 +1,4 @@ - + @@ -17,7 +17,7 @@ Release x64 - + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD} Win32Proj @@ -25,9 +25,21 @@ noise sse2,sse4,avx1-x2 + + v110 + + + v110 + + + v110 + + + v110 + - + \ No newline at end of file diff --git a/examples/options/options.vcxproj b/examples/options/options.vcxproj index d48ac8bc..7d21afd8 100644 --- a/examples/options/options.vcxproj +++ b/examples/options/options.vcxproj @@ -1,4 +1,4 @@ - + @@ -17,7 +17,7 @@ Release x64 - + {8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE} Win32Proj @@ -25,6 +25,18 @@ options sse2,sse4-x2,avx1-x2 + + v110 + + + v110 + + + v110 + + + v110 + @@ -32,4 +44,4 @@ - + \ No newline at end of file diff --git a/examples/perfbench/perfbench.vcxproj b/examples/perfbench/perfbench.vcxproj index d94b753c..b8a64b33 100644 --- a/examples/perfbench/perfbench.vcxproj +++ b/examples/perfbench/perfbench.vcxproj @@ -1,4 +1,4 @@ - + @@ -29,23 +29,27 @@ Application true Unicode + v110 Application true Unicode + v110 Application false true Unicode + v110 Application false true Unicode + v110 @@ -173,4 +177,4 @@ - + \ No newline at end of file diff --git a/examples/rt/rt.vcxproj b/examples/rt/rt.vcxproj index 00b6dd3a..9d77fe82 100644 --- a/examples/rt/rt.vcxproj +++ b/examples/rt/rt.vcxproj @@ -1,4 +1,4 @@ - + @@ -17,7 +17,7 @@ Release x64 - + {E787BC3F-2D2E-425E-A64D-4721E2FF3DC9} Win32Proj @@ -25,10 +25,22 @@ rt sse2,sse4-x2,avx1-i32x8 + + v110 + + + v110 + + + v110 + + + v110 + - + \ No newline at end of file diff --git a/examples/simple/simple.vcxproj b/examples/simple/simple.vcxproj index 34908223..3d80467f 100644 --- a/examples/simple/simple.vcxproj +++ b/examples/simple/simple.vcxproj @@ -53,23 +53,27 @@ $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(Target Application true Unicode + v110 Application true Unicode + v110 Application false true Unicode + v110 Application false true Unicode + v110 @@ -170,4 +174,4 @@ $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(Target - + \ No newline at end of file diff --git a/examples/sort/sort.vcxproj b/examples/sort/sort.vcxproj index b0bdc63d..98126b28 100644 --- a/examples/sort/sort.vcxproj +++ b/examples/sort/sort.vcxproj @@ -17,7 +17,7 @@ Release x64 - + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2} Win32Proj @@ -25,10 +25,22 @@ sort sse2,sse4-x2,avx1-x2 + + v110 + + + v110 + + + v110 + + + v110 + - + \ No newline at end of file diff --git a/examples/stencil/stencil.vcxproj b/examples/stencil/stencil.vcxproj index fd8564aa..9e339efd 100644 --- a/examples/stencil/stencil.vcxproj +++ b/examples/stencil/stencil.vcxproj @@ -1,4 +1,4 @@ - + @@ -25,10 +25,22 @@ stencil sse2,sse4-x2,avx1-i32x8 + + v110 + + + v110 + + + v110 + + + v110 + - + \ No newline at end of file diff --git a/examples/volume_rendering/volume.vcxproj b/examples/volume_rendering/volume.vcxproj index a1fea5f1..3e2882ae 100644 --- a/examples/volume_rendering/volume.vcxproj +++ b/examples/volume_rendering/volume.vcxproj @@ -1,4 +1,4 @@ - + @@ -17,7 +17,7 @@ Release x64 - + {dee5733a-e93e-449d-9114-9bffcaeb4df9} Win32Proj @@ -25,10 +25,22 @@ volume sse2,sse4-x2,avx1-i32x8 + + v110 + + + v110 + + + v110 + + + v110 + - + \ No newline at end of file diff --git a/ispc.sln b/ispc.sln index 8febee18..a9dbb793 100755 --- a/ispc.sln +++ b/ispc.sln @@ -1,6 +1,6 @@  -Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual Studio 2010 +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2012 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ispc", "ispc.vcxproj", "{9861F490-F516-480C-B63C-D62A77AFA9D5}" EndProject Global From c9642aae86d1b73cbaea2c6f885e7240a24e07db Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Fri, 28 Feb 2014 20:05:45 +0400 Subject: [PATCH 08/28] Update of fail DB with result on Windows with VS2012 (3.4 and trunk). Excluding 3.3 result (too many fails) --- fail_db.txt | 469 +--------------------------------------------------- 1 file changed, 3 insertions(+), 466 deletions(-) diff --git a/fail_db.txt b/fail_db.txt index 02432603..1b255cbc 100644 --- a/fail_db.txt +++ b/fail_db.txt @@ -266,469 +266,6 @@ ./tests/ptr-int-1.ispc runfail x86 avx2-i32x8 Linux LLVM 3.5 clang++3.4 -O2 * ./tests/ptr-int-1.ispc runfail x86 avx2-i32x16 Linux LLVM 3.5 clang++3.4 -O2 * ./tests/ptr-int-1.ispc runfail x86 avx2-i64x4 Linux LLVM 3.5 clang++3.4 -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-equal-10.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint64.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\uint64-max-1.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\uint64-max.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\uint64-min-1.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\uint64-min.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-9.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-equal-10.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint64.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\uint64-max-1.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\uint64-max.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\uint64-min-1.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\uint64-min.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-13.ispc compfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-13.ispc compfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-varyingptr-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\packed-store2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * -.\tests\atomics-13.ispc compfail x86-64 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-13.ispc compfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\ptr-int-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\ptr-int-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\ptr-int-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 * -.\tests\ptr-int-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.5 cl -O2 * -.\tests\ptr-int-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.5 cl -O2 * -.\tests\ptr-int-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.5 cl -O2 * +.\tests\foreach-double-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.5 cl -O2 * +.\tests\foreach-double-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.5 cl -O2 * +.\tests\foreach-double-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.5 cl -O2 * From c2e05e2231ebeed85ce2ec07ab8df989f3bbe996 Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskij Date: Fri, 28 Feb 2014 20:06:46 +0400 Subject: [PATCH 09/28] Algorithm was modified and division was changed to bit operations. --- stdlib.ispc | 135 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 109 insertions(+), 26 deletions(-) diff --git a/stdlib.ispc b/stdlib.ispc index 28dd8970..d60219d0 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -4885,28 +4885,6 @@ static inline varying int32 saturating_mul(varying int32 a, varying int32 b) { return result; } -static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { - uniform int64 a_abs = (a > 0) ? a : -a; - uniform int64 b_abs = (b > 0) ? b : -b; - if (a_abs > (INT64_MAX / b_abs)) - if (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) - return INT64_MAX; - else - return INT64_MIN; - return a * b; -} - -static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { - varying int64 a_abs = (a > 0) ? a : -a; - varying int64 b_abs = (b > 0) ? b : -b; - if (a_abs > (INT64_MAX / b_abs)) - if (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) - return INT64_MAX; - else - return INT64_MIN; - return a * b; -} - static inline uniform unsigned int8 saturating_mul(uniform unsigned int8 a, uniform unsigned int8 b) { uniform unsigned int16 result = (uniform unsigned int16) a * @@ -4961,18 +4939,123 @@ static inline varying unsigned int32 saturating_mul(varying unsigned int32 a, return lo | - (varying int32) !! hi; } +static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { + uniform unsigned int64 ret = 0; + + uniform int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; + uniform unsigned int64 a_abs = (a > 0) ? a : -a; + uniform unsigned int64 b_abs = (b > 0) ? b : -b; + + uniform unsigned int32 a0 = a_abs & 0xFFFFFFFF; + uniform unsigned int32 b0 = b_abs & 0xFFFFFFFF; + uniform unsigned int32 a1 = a_abs >> 32; + uniform unsigned int32 b1 = b_abs >> 32; + + if ((a1 != 0) && (b1 != 0)) { + if (sign > 0) { + return INT64_MAX; + } + else { + return INT64_MIN; + } + } else if (a1 != 0) { + ret = saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 , + (uniform unsigned int64) (a0) * b0); + } else if (b1 != 0) { + ret = saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 , + (uniform unsigned int64) (a0) * b0); + } else { + ret = a_abs * b_abs; + } + + + if ((sign < 0) && (ret >= -INT64_MIN)) { + return INT64_MIN; + } else if ((sign > 0) && (ret >= INT64_MAX)) { + return INT64_MAX; + } else { + return ret * sign; + } +} + +static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { + varying unsigned int64 ret = 0; + + varying int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; + varying unsigned int64 a_abs = (a > 0) ? a : -a; + varying unsigned int64 b_abs = (b > 0) ? b : -b; + + varying unsigned int32 a0 = a_abs & 0xFFFFFFFF; + varying unsigned int32 b0 = b_abs & 0xFFFFFFFF; + varying unsigned int32 a1 = a_abs >> 32; + varying unsigned int32 b1 = b_abs >> 32; + + if ((a1 != 0) && (b1 != 0)) { + if (sign > 0) { + return INT64_MAX; + } + else { + return INT64_MIN; + } + } else if (a1 != 0) { + ret = saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 , + (varying unsigned int64) (a0) * b0); + } else if (b1 != 0) { + ret = saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 , + (varying unsigned int64) (a0) * b0); + } else { + ret = a_abs * b_abs; + } + + + if ((sign < 0) && (ret >= -INT64_MIN)) { + return INT64_MIN; + } else if ((sign > 0) && (ret >= INT64_MAX)) { + return INT64_MAX; + } else { + return ret * sign; + } +} + + static inline uniform unsigned int64 saturating_mul(uniform unsigned int64 a, uniform unsigned int64 b) { - if (a > (UINT64_MAX / b)) + uniform unsigned int32 a0 = a & 0xFFFFFFFF; + uniform unsigned int32 b0 = b & 0xFFFFFFFF; + uniform unsigned int32 a1 = a >> 32; + uniform unsigned int32 b1 = b >> 32; + + if ((a1 != 0) && (b1 != 0)) { return UINT64_MAX; - return a * b; + } else if (a1 != 0) { + return saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 , + (uniform unsigned int64) (a0) * b0); + } else if (b1 != 0) { + return saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 , + (uniform unsigned int64) (a0) * b0); + } else { + return a * b; + } } static inline varying unsigned int64 saturating_mul(varying unsigned int64 a, varying unsigned int64 b) { - if (a > (UINT64_MAX / b)) + varying unsigned int32 a0 = a & 0xFFFFFFFF; + varying unsigned int32 b0 = b & 0xFFFFFFFF; + varying unsigned int32 a1 = a >> 32; + varying unsigned int32 b1 = b >> 32; + + if ((a1 != 0) && (b1 != 0)) { return UINT64_MAX; - return a * b; + } else if (a1 != 0) { + return saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 , + (varying unsigned int64) (a0) * b0); + } else if (b1 != 0) { + return saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 , + (varying unsigned int64) (a0) * b0); + } else { + return a * b; + } } /////////////////////////////////////////////////////////////////////////// // rdrand From c4e35050b02fa10e808a8ddf005659e8cdbe1535 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Tue, 4 Mar 2014 16:01:18 +0400 Subject: [PATCH 10/28] support of building with C++11 --- Makefile | 6 +++--- cbackend.cpp | 14 +++++++++++++- expr.cpp | 4 ++-- util.cpp | 2 +- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index f6d7af38..0b272b52 100644 --- a/Makefile +++ b/Makefile @@ -119,9 +119,9 @@ CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE) \ $(LLVM_VERSION_DEF) \ -Wall \ -DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\"" \ - -Wno-sign-compare -Wno-unused-function -ifneq ($(LLVM_VERSION),LLVM_3_1) - CXXFLAGS+=-Werror + -Wno-sign-compare -Wno-unused-function -Werror +ifeq ($(LLVM_VERSION),LLVM_3_5) + CXXFLAGS+=-std=c++11 -Wno-c99-extensions -Wno-deprecated-register endif ifneq ($(ARM_ENABLED), 0) CXXFLAGS+=-DISPC_ARM_ENABLED diff --git a/cbackend.cpp b/cbackend.cpp index 6465d466..1fcbfc2a 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -1462,7 +1462,7 @@ void CWriter::printConstant(llvm::Constant *CPV, bool Static) { char Buffer[100]; uint64_t ll = llvm::DoubleToBits(V); - sprintf(Buffer, "0x%"PRIx64, ll); + sprintf(Buffer, "0x%" PRIx64, ll); std::string Num(&Buffer[0], &Buffer[6]); unsigned long Val = strtoul(Num.c_str(), 0, 16); @@ -3123,7 +3123,11 @@ void CWriter::visitSwitchInst(llvm::SwitchInst &SI) { Out << ":\n"; printPHICopiesForSuccessor (SI.getParent(), Succ, 2); printBranchToBlock(SI.getParent(), Succ, 2); +#if defined (LLVM_3_5) + if (llvm::Function::iterator(Succ) == std::next(llvm::Function::iterator(SI.getParent()))) +#else if (llvm::Function::iterator(Succ) == llvm::next(llvm::Function::iterator(SI.getParent()))) +#endif Out << " break;\n"; } @@ -3144,7 +3148,11 @@ bool CWriter::isGotoCodeNecessary(llvm::BasicBlock *From, llvm::BasicBlock *To) /// FIXME: This should be reenabled, but loop reordering safe!! return true; +#if defined (LLVM_3_5) + if (std::next(llvm::Function::iterator(From)) != llvm::Function::iterator(To)) +#else if (llvm::next(llvm::Function::iterator(From)) != llvm::Function::iterator(To)) +#endif return true; // Not the direct successor, we need a goto. //llvm::isa(From->getTerminator()) @@ -3752,7 +3760,11 @@ void CWriter::lowerIntrinsics(llvm::Function &F) { // All other intrinsic calls we must lower. llvm::Instruction *Before = 0; if (CI != &BB->front()) +#if defined(LLVM_3_5) + Before = std::prev(llvm::BasicBlock::iterator(CI)); +#else Before = prior(llvm::BasicBlock::iterator(CI)); +#endif IL->LowerIntrinsicCall(CI); if (Before) { // Move iterator to instruction after call diff --git a/expr.cpp b/expr.cpp index b5c876fd..020a3b82 100644 --- a/expr.cpp +++ b/expr.cpp @@ -6194,10 +6194,10 @@ ConstExpr::Print() const { printf("%f", floatVal[i]); break; case AtomicType::TYPE_INT64: - printf("%"PRId64, int64Val[i]); + printf("%" PRId64, int64Val[i]); break; case AtomicType::TYPE_UINT64: - printf("%"PRIu64, uint64Val[i]); + printf("%" PRIu64, uint64Val[i]); break; case AtomicType::TYPE_DOUBLE: printf("%f", doubleVal[i]); diff --git a/util.cpp b/util.cpp index 6b121988..70bf53bb 100644 --- a/util.cpp +++ b/util.cpp @@ -577,7 +577,7 @@ GetDirectoryAndFileName(const std::string ¤tDirectory, const char *basenameStart = strrchr(fp, '/'); Assert(basenameStart != NULL); ++basenameStart; - Assert(basenameStart != '\0'); + Assert(basenameStart[0] != '\0'); *filename = basenameStart; *directory = std::string(fp, basenameStart - fp); #endif // ISPC_IS_WINDOWS From 38ce3f368cd6d1e1137eb6cf852566ae652ed101 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Tue, 4 Mar 2014 16:02:01 +0400 Subject: [PATCH 11/28] support LLVM trunk after r202720 revision --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0b272b52..7d5d41dd 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ endif ARCH_TYPE = $(shell arch) LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags) -LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//) +LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn// -e s/\.0//) LLVM_VERSION_DEF=-D$(LLVM_VERSION) LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker From c017e468200fe0d8a5ef91567857a3cc771c0b00 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Tue, 4 Mar 2014 16:02:25 +0400 Subject: [PATCH 12/28] support LLVM trunk after r202736 revision --- ctx.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ctx.cpp b/ctx.cpp index 6ff26c6a..43964af3 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1546,7 +1546,14 @@ FunctionEmitContext::StartScope() { llvm::DILexicalBlock lexicalBlock = m->diBuilder->createLexicalBlock(parentScope, diFile, currentPos.first_line, +#if defined(LLVM_3_5) + // Revision 202736 in LLVM adds support of DWARF discriminator + // to the last argument and revision 202737 in clang adds 0 + // for the last argument by default. + currentPos.first_column, 0); +#else currentPos.first_column); +#endif AssertPos(currentPos, lexicalBlock.Verify()); debugScopes.push_back(lexicalBlock); } From 4d05ec0e1e067152ad9f20f89b43711a7975f38a Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Tue, 4 Mar 2014 18:19:25 +0400 Subject: [PATCH 13/28] supporting VS2012 for all examples --- examples/aobench/aobench.vcxproj | 14 +- examples/aobench_instrumented/Makefile | 2 +- .../{ao.ispc => ao_instrumented.ispc} | 0 .../aobench_instrumented.vcxproj | 164 ++---------------- examples/common.props | 12 +- examples/deferred/deferred_shading.vcxproj | 14 +- examples/mandelbrot/mandelbrot.vcxproj | 14 +- .../mandelbrot_tasks/mandelbrot_tasks.vcxproj | 14 +- examples/noise/noise.vcxproj | 14 +- examples/options/options.vcxproj | 14 +- examples/perfbench/perfbench.vcxproj | 155 +---------------- examples/rt/rt.vcxproj | 14 +- examples/simple/simple.vcxproj | 159 +---------------- examples/sort/sort.vcxproj | 14 +- examples/stencil/stencil.vcxproj | 14 +- examples/volume_rendering/volume.vcxproj | 14 +- 16 files changed, 40 insertions(+), 592 deletions(-) rename examples/aobench_instrumented/{ao.ispc => ao_instrumented.ispc} (100%) diff --git a/examples/aobench/aobench.vcxproj b/examples/aobench/aobench.vcxproj index 66918e2a..298be2cb 100644 --- a/examples/aobench/aobench.vcxproj +++ b/examples/aobench/aobench.vcxproj @@ -25,22 +25,10 @@ ao sse2,sse4,avx1-i32x8 - - v110 - - - v110 - - - v110 - - - v110 - - \ No newline at end of file + diff --git a/examples/aobench_instrumented/Makefile b/examples/aobench_instrumented/Makefile index d0b27cbf..d47a5c31 100644 --- a/examples/aobench_instrumented/Makefile +++ b/examples/aobench_instrumented/Makefile @@ -20,7 +20,7 @@ ao: objs/ao.o objs/instrument.o objs/ao_ispc.o ../tasksys.cpp objs/%.o: %.cpp dirs $(CXX) $< $(CXXFLAGS) -c -o $@ -objs/ao.o: objs/ao_ispc.h +objs/ao.o: objs/ao_instrumented_ispc.h objs/%_ispc.h objs/%_ispc.o: %.ispc dirs $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_instrumented_ispc.h diff --git a/examples/aobench_instrumented/ao.ispc b/examples/aobench_instrumented/ao_instrumented.ispc similarity index 100% rename from examples/aobench_instrumented/ao.ispc rename to examples/aobench_instrumented/ao_instrumented.ispc diff --git a/examples/aobench_instrumented/aobench_instrumented.vcxproj b/examples/aobench_instrumented/aobench_instrumented.vcxproj index c1abf354..6eaf55d9 100644 --- a/examples/aobench_instrumented/aobench_instrumented.vcxproj +++ b/examples/aobench_instrumented/aobench_instrumented.vcxproj @@ -18,162 +18,18 @@ x64 + + {B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958} + Win32Proj + aobench_instrumented + ao_instrumented + sse2 + --instrument + + - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --arch=x86 --instrument --target=sse2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --instrument --target=sse2 - - $(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h - $(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --arch=x86 --instrument --target=sse2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --instrument --target=sse2 - - $(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h - $(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h - - - - {B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958} - Win32Proj - aobench_instrumented - ispc - - - - Application - true - Unicode - v110 - - - Application - true - Unicode - v110 - - - Application - false - true - Unicode - v110 - - - Application - false - true - Unicode - v110 - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - true - - - true - $(ProjectDir)..\..;$(ExecutablePath) - true - - - false - $(ProjectDir)..\..;$(ExecutablePath) - true - - - false - $(ProjectDir)..\..;$(ExecutablePath) - true - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) - $(TargetDir) - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) - $(TargetDir) - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) - $(TargetDir) - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) - $(TargetDir) - - - Console - true - true - true - - - - - - \ No newline at end of file + diff --git a/examples/common.props b/examples/common.props index 3769330b..5cfad4fc 100644 --- a/examples/common.props +++ b/examples/common.props @@ -23,23 +23,27 @@ Application true Unicode + v110 Application true Unicode + v110 Application false true Unicode + v110 Application false true Unicode + v110 @@ -156,12 +160,12 @@ Document - $(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str) - $(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str) + $(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str) $(flags) + $(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str) $(flags) $(Target_out) $(Target_out) - $(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str) - $(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str) + $(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str) $(flags) + $(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str) $(flags) $(Target_out) $(Target_out) diff --git a/examples/deferred/deferred_shading.vcxproj b/examples/deferred/deferred_shading.vcxproj index 51c217ee..3e6c4c12 100755 --- a/examples/deferred/deferred_shading.vcxproj +++ b/examples/deferred/deferred_shading.vcxproj @@ -25,18 +25,6 @@ kernels sse2,sse4-x2,avx1-x2 - - v110 - - - v110 - - - v110 - - - v110 - @@ -45,4 +33,4 @@ - \ No newline at end of file + diff --git a/examples/mandelbrot/mandelbrot.vcxproj b/examples/mandelbrot/mandelbrot.vcxproj index 406986fa..7a5f6e03 100644 --- a/examples/mandelbrot/mandelbrot.vcxproj +++ b/examples/mandelbrot/mandelbrot.vcxproj @@ -25,21 +25,9 @@ mandelbrot sse2,sse4-x2,avx1-x2 - - v110 - - - v110 - - - v110 - - - v110 - - \ No newline at end of file + diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj index 57f741f4..a10cd0ae 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj @@ -25,22 +25,10 @@ mandelbrot_tasks sse2,sse4-x2,avx1-x2 - - v110 - - - v110 - - - v110 - - - v110 - - \ No newline at end of file + diff --git a/examples/noise/noise.vcxproj b/examples/noise/noise.vcxproj index b7f87354..f0e6e207 100644 --- a/examples/noise/noise.vcxproj +++ b/examples/noise/noise.vcxproj @@ -25,21 +25,9 @@ noise sse2,sse4,avx1-x2 - - v110 - - - v110 - - - v110 - - - v110 - - \ No newline at end of file + diff --git a/examples/options/options.vcxproj b/examples/options/options.vcxproj index 7d21afd8..526f8450 100644 --- a/examples/options/options.vcxproj +++ b/examples/options/options.vcxproj @@ -25,18 +25,6 @@ options sse2,sse4-x2,avx1-x2 - - v110 - - - v110 - - - v110 - - - v110 - @@ -44,4 +32,4 @@ - \ No newline at end of file + diff --git a/examples/perfbench/perfbench.vcxproj b/examples/perfbench/perfbench.vcxproj index b8a64b33..7bafb480 100644 --- a/examples/perfbench/perfbench.vcxproj +++ b/examples/perfbench/perfbench.vcxproj @@ -22,159 +22,12 @@ {d923bb7e-a7c8-4850-8fcf-0eb9ce35b4e8} Win32Proj perfbench - ispc + perfbench + sse2-i32x4,sse4-i32x4,avx1-i32x8 - - - Application - true - Unicode - v110 - - - Application - true - Unicode - v110 - - - Application - false - true - Unicode - v110 - - - Application - false - true - Unicode - v110 - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - + - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - - - \ No newline at end of file + diff --git a/examples/rt/rt.vcxproj b/examples/rt/rt.vcxproj index 9d77fe82..38b34879 100644 --- a/examples/rt/rt.vcxproj +++ b/examples/rt/rt.vcxproj @@ -25,22 +25,10 @@ rt sse2,sse4-x2,avx1-i32x8 - - v110 - - - v110 - - - v110 - - - v110 - - \ No newline at end of file + diff --git a/examples/simple/simple.vcxproj b/examples/simple/simple.vcxproj index 3d80467f..a540353c 100644 --- a/examples/simple/simple.vcxproj +++ b/examples/simple/simple.vcxproj @@ -18,160 +18,15 @@ x64 - - - - - - Document - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2 - - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2 - - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h - - {947C5311-8B78-4D05-BEE4-BCF342D4B367} Win32Proj simple - ispc + simple + sse2 - - - Application - true - Unicode - v110 - - - Application - true - Unicode - v110 - - - Application - false - true - Unicode - v110 - - - Application - false - true - Unicode - v110 - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - - - Console - true - true - true - - - - - - \ No newline at end of file + + + + + diff --git a/examples/sort/sort.vcxproj b/examples/sort/sort.vcxproj index 98126b28..f50a16b4 100644 --- a/examples/sort/sort.vcxproj +++ b/examples/sort/sort.vcxproj @@ -25,22 +25,10 @@ sort sse2,sse4-x2,avx1-x2 - - v110 - - - v110 - - - v110 - - - v110 - - \ No newline at end of file + diff --git a/examples/stencil/stencil.vcxproj b/examples/stencil/stencil.vcxproj index 9e339efd..168039bc 100644 --- a/examples/stencil/stencil.vcxproj +++ b/examples/stencil/stencil.vcxproj @@ -25,22 +25,10 @@ stencil sse2,sse4-x2,avx1-i32x8 - - v110 - - - v110 - - - v110 - - - v110 - - \ No newline at end of file + diff --git a/examples/volume_rendering/volume.vcxproj b/examples/volume_rendering/volume.vcxproj index 3e2882ae..d0e3d8d0 100644 --- a/examples/volume_rendering/volume.vcxproj +++ b/examples/volume_rendering/volume.vcxproj @@ -25,22 +25,10 @@ volume sse2,sse4-x2,avx1-i32x8 - - v110 - - - v110 - - - v110 - - - v110 - - \ No newline at end of file + From 2e2fd394bfdba1adc62d54f622ab37d1de5191e5 Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskij Date: Wed, 5 Mar 2014 01:30:16 +0400 Subject: [PATCH 14/28] Documents for saturating arithmetic was added. --- docs/ispc.rst | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/docs/ispc.rst b/docs/ispc.rst index 2c41301c..18663942 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3615,6 +3615,41 @@ normalized exponent as a power of two in the ``pw2`` parameter. uniform int * uniform pw2) +Saturating Arithmetic +--------------------- +A saturation addition, substraction, multiplication and division of all integer +types is provided by the ``ispc`` standard library. + +:: + + int8 saturating_add(uniform int8 a, uniform int8 b) + int8 saturating_add(varying int8 a, varying int8 b) + unsigned int8 saturating_add(uniform unsigned int8 a, uniform unsigned int8 b) + unsigned int8 saturating_add(varying unsigned int8 a, varying unsigned int8 b) + + int8 saturating_sub(uniform int8 a, uniform int8 b) + int8 saturating_sub(varying int8 a, varying int8 b) + unsigned int8 saturating_sub(uniform unsigned int8 a, uniform unsigned int8 b) + unsigned int8 saturating_sub(varying unsigned int8 a, varying unsigned int8 b) + + int8 saturating_mul(uniform int8 a, uniform int8 b) + int8 saturating_mul(varying int8 a, varying int8 b) + unsigned int8 saturating_mul(uniform unsigned int8 a, uniform unsigned int8 b) + unsigned int8 saturating_mul(varying unsigned int8 a, varying unsigned int8 b) + + int8 saturating_div(uniform int8 a, uniform int8 b) + int8 saturating_div(varying int8 a, varying int8 b) + unsigned int8 saturating_div(uniform unsigned int8 a, uniform unsigned int8 b) + unsigned int8 saturating_div(varying unsigned int8 a,varying unsigned int8 b) + + +In addition to the ``int8`` variants of saturating arithmetic functions listed +above, there are versions that supports ``int16``, ``int32`` and ``int64`` +values as well. Functions that have best high-speed performance are functions +that support ``varying signed/unsined int8/int16`` on Intel® SSE and Intel® AVX, +because they have hardware implementation. + + Pseudo-Random Numbers --------------------- From 9ab8f4e10e8bd0670080ce1e78c40487ecb9dcd4 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Wed, 5 Mar 2014 10:12:30 +0400 Subject: [PATCH 15/28] support LLVM trunk after 202814-202842 revisions --- cbackend.cpp | 13 +++++++------ expr.cpp | 7 +++++-- func.cpp | 3 ++- module.cpp | 7 +++---- opt.cpp | 13 ++++--------- 5 files changed, 21 insertions(+), 22 deletions(-) diff --git a/cbackend.cpp b/cbackend.cpp index 1fcbfc2a..268d86b0 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -66,9 +66,15 @@ #if defined(LLVM_3_5) #include "llvm/IR/Verifier.h" #include + #include "llvm/IR/CallSite.h" + #include "llvm/IR/CFG.h" + #include "llvm/IR/GetElementPtrTypeIterator.h" #else #include "llvm/Analysis/Verifier.h" #include + #include "llvm/Support/CallSite.h" + #include "llvm/Support/CFG.h" + #include "llvm/Support/GetElementPtrTypeIterator.h" #endif #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Passes.h" @@ -82,18 +88,13 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#if defined(LLVM_3_1) - #include "llvm/Target/TargetData.h" -#elif defined(LLVM_3_2) +#if defined(LLVM_3_2) #include "llvm/DataLayout.h" #else // LLVM 3.3+ #include "llvm/IR/DataLayout.h" #endif -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #if defined(LLVM_3_1) || defined(LLVM_3_2) #include "llvm/Support/InstVisitor.h" #else // LLVM 3.3+ diff --git a/expr.cpp b/expr.cpp index 020a3b82..d448f891 100644 --- a/expr.cpp +++ b/expr.cpp @@ -74,8 +74,11 @@ #include #endif #include -#include - +#if defined(LLVM_3_5) + #include +#else + #include +#endif ///////////////////////////////////////////////////////////////////////////////////// // Expr diff --git a/func.cpp b/func.cpp index 76ae43f5..9dbcbcfd 100644 --- a/func.cpp +++ b/func.cpp @@ -72,11 +72,12 @@ #if defined(LLVM_3_5) #include #include + #include #else #include #include + #include #endif -#include #include Function::Function(Symbol *s, Stmt *c) { diff --git a/module.cpp b/module.cpp index f43096be..014b7f5f 100644 --- a/module.cpp +++ b/module.cpp @@ -86,9 +86,7 @@ #include #include #include -#if defined(LLVM_3_1) - #include -#elif defined(LLVM_3_2) +#if defined(LLVM_3_2) #include #include #else // LLVM 3.3+ @@ -98,11 +96,12 @@ #if defined(LLVM_3_5) #include #include + #include #else #include #include + #include #endif -#include #include #include #include diff --git a/opt.cpp b/opt.cpp index e7e98ad7..5c27eb4f 100644 --- a/opt.cpp +++ b/opt.cpp @@ -71,9 +71,11 @@ #if defined(LLVM_3_5) #include #include + #include #else #include #include + #include #endif #include #include @@ -83,9 +85,7 @@ #include #include #include -#if defined(LLVM_3_1) - #include -#elif defined(LLVM_3_2) +#if defined(LLVM_3_2) #include #else // LLVM 3.3+ #include @@ -94,12 +94,7 @@ #include #include #include -#include -#if defined(LLVM_3_1) - #include -#else - #include -#endif +#include #include #ifdef ISPC_IS_LINUX #include From 6738af0a0c159e96cf766ab7aeda6c77e7056eb4 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Wed, 5 Mar 2014 20:18:36 +0400 Subject: [PATCH 16/28] changing uniform_min and uniform_max implementations for avx targets --- builtins/target-avx-common.ll | 41 ++++++++++++++++++----------------- tests/max-double-1.ispc | 19 ++++++++++++++++ tests/max-double-2.ispc | 18 +++++++++++++++ tests/max-float-1.ispc | 14 +++++++++--- tests/max-float-2.ispc | 12 +++++++--- tests/max-int-1.ispc | 11 +++++++--- tests/max-int.ispc | 10 ++++++--- tests/max-uint-1.ispc | 4 +++- tests/min-double-1.ispc | 19 ++++++++++++++++ tests/min-double-2.ispc | 18 +++++++++++++++ tests/min-float-1.ispc | 14 +++++++++--- tests/min-float-2.ispc | 18 +++++++++++++++ tests/min-float.ispc | 11 ---------- tests/min-int-1.ispc | 11 +++++++--- tests/min-int.ispc | 11 +++++++--- tests/min-uint-1.ispc | 8 ++++--- 16 files changed, 183 insertions(+), 56 deletions(-) create mode 100644 tests/max-double-1.ispc create mode 100644 tests/max-double-2.ispc create mode 100644 tests/min-double-1.ispc create mode 100644 tests/min-double-2.ispc create mode 100644 tests/min-float-2.ispc delete mode 100644 tests/min-float.ispc diff --git a/builtins/target-avx-common.ll b/builtins/target-avx-common.ll index 1c467476..54656d9f 100644 --- a/builtins/target-avx-common.ll +++ b/builtins/target-avx-common.ll @@ -203,49 +203,51 @@ define void @__fastmath() nounwind alwaysinline { ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; float min/max -declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone -declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone - define float @__max_uniform_float(float, float) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1) + %cmp = fcmp ogt float %1, %0 + %ret = select i1 %cmp, float %1, float %0 ret float %ret } define float @__min_uniform_float(float, float) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1) + %cmp = fcmp ogt float %1, %0 + %ret = select i1 %cmp, float %0, float %1 ret float %ret } ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; double precision min/max -declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone -declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone - define double @__min_uniform_double(double, double) nounwind readnone alwaysinline { - sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.sd, %0, %1) + %cmp = fcmp ogt double %1, %0 + %ret = select i1 %cmp, double %0, double %1 ret double %ret } define double @__max_uniform_double(double, double) nounwind readnone alwaysinline { - sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1) + %cmp = fcmp ogt double %1, %0 + %ret = select i1 %cmp, double %1, double %0 ret double %ret } +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone +declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone +declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone +declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max -declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone - define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminsd, %0, %1) + %cmp = icmp sgt i32 %1, %0 + %ret = select i1 %cmp, i32 %0, i32 %1 ret i32 %ret } define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxsd, %0, %1) + %cmp = icmp sgt i32 %1, %0 + %ret = select i1 %cmp, i32 %1, i32 %0 ret i32 %ret } @@ -253,16 +255,15 @@ define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline { ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; unsigned int min/max -declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone - define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminud, %0, %1) + %cmp = icmp ugt i32 %1, %0 + %ret = select i1 %cmp, i32 %0, i32 %1 ret i32 %ret } define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline { - sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxud, %0, %1) + %cmp = icmp ugt i32 %1, %0 + %ret = select i1 %cmp, i32 %1, i32 %0 ret i32 %ret } diff --git a/tests/max-double-1.ispc b/tests/max-double-1.ispc new file mode 100644 index 00000000..e9c4a6a3 --- /dev/null +++ b/tests/max-double-1.ispc @@ -0,0 +1,19 @@ + +export uniform int width() { return programCount; } + + + +export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) { + double a = aFOO[programIndex]; + RET[programIndex] = max(3 * a, (double)10.f); + RET[width()-1] = max(b, (double)100); +} + + +export void result(uniform float RET[]) { + RET[programIndex] = 3 * (1+programIndex); + RET[0] = 10; + RET[1] = 10; + RET[2] = 10; + RET[programCount-1] = 100; +} diff --git a/tests/max-double-2.ispc b/tests/max-double-2.ispc new file mode 100644 index 00000000..5f4c854e --- /dev/null +++ b/tests/max-double-2.ispc @@ -0,0 +1,18 @@ + +export uniform int width() { return programCount; } + + + +export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) { + double a = aFOO[programIndex]; + RET[programIndex] = max(-10 * (a-3), (double).1f); + RET[width() - 1] = max(-10 * b, (double)2); +} + +export void result(uniform float RET[]) { + RET[programIndex] = .1; + RET[0] = 20; + RET[1] = 10; + RET[programCount - 1] = 2; +} + diff --git a/tests/max-float-1.ispc b/tests/max-float-1.ispc index b77de7e3..24b9822d 100644 --- a/tests/max-float-1.ispc +++ b/tests/max-float-1.ispc @@ -3,9 +3,17 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - RET[programIndex] = max(10 * a, 10.f); + RET[programIndex] = max(3 * a, 10.f); + RET[width()-1] = max(b, 100); } -export void result(uniform float RET[]) { RET[programIndex] = 10 * (1+programIndex); } + +export void result(uniform float RET[]) { + RET[programIndex] = 3 * (1+programIndex); + RET[0] = 10; + RET[1] = 10; + RET[2] = 10; + RET[programCount-1] = 100; +} diff --git a/tests/max-float-2.ispc b/tests/max-float-2.ispc index ca025c2f..f990b102 100644 --- a/tests/max-float-2.ispc +++ b/tests/max-float-2.ispc @@ -3,10 +3,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - RET[programIndex] = max(-10 * a, 10.f); + RET[programIndex] = max(-10 * (a-3), .1f); + RET[width() - 1] = max(-10 * b, 2); } -export void result(uniform float RET[]) { RET[programIndex] = 10.; } +export void result(uniform float RET[]) { + RET[programIndex] = .1; + RET[0] = 20; + RET[1] = 10; + RET[programCount - 1] = 2; +} diff --git a/tests/max-int-1.ispc b/tests/max-int-1.ispc index f1492b8b..7a565d4c 100644 --- a/tests/max-int-1.ispc +++ b/tests/max-int-1.ispc @@ -3,11 +3,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; int i = (int)a; - RET[programIndex] = max((int)200, i); + RET[programIndex] = max((int)2, i); + RET[width()-1] = max(10, (int)b); } -export void result(uniform float RET[]) { RET[programIndex] = 200.; } +export void result(uniform float RET[]) { + RET[programIndex] = programIndex + 1; + RET[0] = 2; + RET[programCount-1] = 10; +} diff --git a/tests/max-int.ispc b/tests/max-int.ispc index 3a4bb641..783a9274 100644 --- a/tests/max-int.ispc +++ b/tests/max-int.ispc @@ -3,11 +3,15 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; int i = (int)a; - RET[programIndex] = max((int)-20, i); + RET[programIndex] = max((int)-2, -1 * i); + RET[width() - 1] = max(-2, -1 * (int)b); } -export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; } +export void result(uniform float RET[]) { + RET[programIndex] = -2; + RET[0] = -1; +} diff --git a/tests/max-uint-1.ispc b/tests/max-uint-1.ispc index d1143f5d..78a66625 100644 --- a/tests/max-uint-1.ispc +++ b/tests/max-uint-1.ispc @@ -1,14 +1,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float r[], uniform float a[]) { +export void f_fu(uniform float r[], uniform float a[], uniform float b) { unsigned int i = (unsigned int)a[programIndex]; r[programIndex] = max((unsigned int)2, i); + r[width() - 1] = max((unsigned int)10, (unsigned int)b); } export void result(uniform float r[]) { r[programIndex] = 1+programIndex; r[0] = 2; + r[programCount - 1] = 10; } diff --git a/tests/min-double-1.ispc b/tests/min-double-1.ispc new file mode 100644 index 00000000..813a99fe --- /dev/null +++ b/tests/min-double-1.ispc @@ -0,0 +1,19 @@ + +export uniform int width() { return programCount; } + + + +export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) { + double a = aFOO[programIndex]; + RET[programIndex] = min(3 * a, (double)10.f); + RET[width()-1] = min(b, (double)100); +} + + +export void result(uniform float RET[]) { + RET[programIndex] = 10; + RET[0] = 3; + RET[1] = 6; + RET[2] = 9; + RET[programCount-1] = 5; +} diff --git a/tests/min-double-2.ispc b/tests/min-double-2.ispc new file mode 100644 index 00000000..26609b81 --- /dev/null +++ b/tests/min-double-2.ispc @@ -0,0 +1,18 @@ + +export uniform int width() { return programCount; } + + + +export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) { + double a = aFOO[programIndex]; + RET[programIndex] = min(-10 * (a-3), (double).1f); + RET[width() - 1] = min(-10 * b, (double)2); +} + +export void result(uniform float RET[]) { + RET[programIndex] = -10 * (programIndex - 2); + RET[0] = .1; + RET[1] = .1; + RET[programCount - 1] = -50; +} + diff --git a/tests/min-float-1.ispc b/tests/min-float-1.ispc index 914ae994..5b62c5c5 100644 --- a/tests/min-float-1.ispc +++ b/tests/min-float-1.ispc @@ -3,9 +3,17 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; - RET[programIndex] = min(10 * a, 10.f); + RET[programIndex] = min(3 * a, 10.f); + RET[width()-1] = min(b, 100); } -export void result(uniform float RET[]) { RET[programIndex] = 10.; } + +export void result(uniform float RET[]) { + RET[programIndex] = 10; + RET[0] = 3; + RET[1] = 6; + RET[2] = 9; + RET[programCount-1] = 5; +} diff --git a/tests/min-float-2.ispc b/tests/min-float-2.ispc new file mode 100644 index 00000000..85c226ca --- /dev/null +++ b/tests/min-float-2.ispc @@ -0,0 +1,18 @@ + +export uniform int width() { return programCount; } + + + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + float a = aFOO[programIndex]; + RET[programIndex] = min(-10 * (a-3), .1f); + RET[width() - 1] = min(-10 * b, 2); +} + +export void result(uniform float RET[]) { + RET[programIndex] = -10 * (programIndex - 2); + RET[0] = .1; + RET[1] = .1; + RET[programCount - 1] = -50; +} + diff --git a/tests/min-float.ispc b/tests/min-float.ispc deleted file mode 100644 index caedd962..00000000 --- a/tests/min-float.ispc +++ /dev/null @@ -1,11 +0,0 @@ - -export uniform int width() { return programCount; } - - - -export void f_f(uniform float RET[], uniform float aFOO[]) { - float a = aFOO[programIndex]; - RET[programIndex] = min(a, 200.f); -} - -export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; } diff --git a/tests/min-int-1.ispc b/tests/min-int-1.ispc index 1c81936f..86f0821d 100644 --- a/tests/min-int-1.ispc +++ b/tests/min-int-1.ispc @@ -3,11 +3,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; int i = (int)a; - RET[programIndex] = min((int)-20, i); + RET[programIndex] = min((int)2, i); + RET[width()-1] = min(10, (int)b); } -export void result(uniform float RET[]) { RET[programIndex] = -20; } +export void result(uniform float RET[]) { + RET[programIndex] = 2; + RET[0] = 1; + RET[programCount-1] = 5; +} diff --git a/tests/min-int.ispc b/tests/min-int.ispc index 483b9b41..7f97e28c 100644 --- a/tests/min-int.ispc +++ b/tests/min-int.ispc @@ -3,11 +3,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float RET[], uniform float aFOO[]) { +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { float a = aFOO[programIndex]; int i = (int)a; - RET[programIndex] = min((int)200, i); + RET[programIndex] = min((int)-2, -1 * i); + RET[width() - 1] = min(-2, -1 * (int)b); } -export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; } +export void result(uniform float RET[]) { + RET[programIndex] = - programIndex - 1; + RET[0] = -2; + RET[programCount - 1] = -5; +} diff --git a/tests/min-uint-1.ispc b/tests/min-uint-1.ispc index d1cd4461..042382f0 100644 --- a/tests/min-uint-1.ispc +++ b/tests/min-uint-1.ispc @@ -1,14 +1,16 @@ export uniform int width() { return programCount; } -export void f_f(uniform float result[], uniform float aa[]) { - unsigned int i = (unsigned int)aa[programIndex]; - result[programIndex] = min((unsigned int)2, i); +export void f_fu(uniform float r[], uniform float a[], uniform float b) { + unsigned int i = (unsigned int)a[programIndex]; + r[programIndex] = min((unsigned int)2, i); + r[width() - 1] = min((unsigned int)10, (unsigned int)b); } export void result(uniform float r[]) { r[programIndex] = 2; r[0] = 1; + r[programCount - 1] = 5; } From 47f7900cd3ae66e7b62259898dd04d8590ea5d4b Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Fri, 7 Mar 2014 16:28:56 +0400 Subject: [PATCH 17/28] support LLVM trunk --- builtins.cpp | 6 +++++- cbackend.cpp | 4 +++- ctx.h | 6 +++--- ispc.cpp | 6 +++--- module.h | 5 ++++- opt.cpp | 3 ++- type.cpp | 6 +++--- 7 files changed, 23 insertions(+), 13 deletions(-) diff --git a/builtins.cpp b/builtins.cpp index 8048bb5c..a30d10b3 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -66,7 +66,11 @@ #include #include #endif -#include +#if defined(LLVM_3_5) + #include +#else + #include +#endif #include #include #include diff --git a/cbackend.cpp b/cbackend.cpp index 268d86b0..8fc3b4ed 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -97,8 +97,10 @@ #include "llvm/Support/FormattedStream.h" #if defined(LLVM_3_1) || defined(LLVM_3_2) #include "llvm/Support/InstVisitor.h" -#else // LLVM 3.3+ +#elif defined (LLVM_3_3) || defined (LLVM_3_4) #include "llvm/InstVisitor.h" +#else // LLVM 3.5+ + #include "llvm/IR/InstVisitor.h" #endif #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" diff --git a/ctx.h b/ctx.h index 4dd30053..f04b08dd 100644 --- a/ctx.h +++ b/ctx.h @@ -47,9 +47,9 @@ #include #include #endif -#if defined(LLVM_3_1) - #include - #include +#if defined(LLVM_3_5) + #include + #include #else #include #include diff --git a/ispc.cpp b/ispc.cpp index 0792291e..26c215b5 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -57,9 +57,9 @@ #include #include #endif -#if defined(LLVM_3_1) - #include - #include +#if defined(LLVM_3_5) + #include + #include #else #include #include diff --git a/module.h b/module.h index 3609260c..e117f933 100644 --- a/module.h +++ b/module.h @@ -41,9 +41,12 @@ #include "ispc.h" #include "ast.h" -#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) +#if defined(LLVM_3_4) #include #endif +#if defined(LLVM_3_5) + #include +#endif namespace llvm { diff --git a/opt.cpp b/opt.cpp index 5c27eb4f..cf44f485 100644 --- a/opt.cpp +++ b/opt.cpp @@ -72,10 +72,12 @@ #include #include #include + #include #else #include #include #include + #include #endif #include #include @@ -94,7 +96,6 @@ #include #include #include -#include #include #ifdef ISPC_IS_LINUX #include diff --git a/type.cpp b/type.cpp index cf7ac85d..2e9d831e 100644 --- a/type.cpp +++ b/type.cpp @@ -50,9 +50,9 @@ #include #include #endif -#if defined(LLVM_3_1) - #include - #include +#if defined(LLVM_3_5) + #include + #include #else #include #include From dc00b4dd64389a2e82bf9e6bb3482e61264bc47b Mon Sep 17 00:00:00 2001 From: Vsevolod Livinskij Date: Thu, 6 Mar 2014 21:10:08 +0400 Subject: [PATCH 18/28] Undefined operation -INT64_MIN was fixed. --- docs/ispc.rst | 8 +++----- stdlib.ispc | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index 18663942..4819ee9c 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3617,8 +3617,8 @@ normalized exponent as a power of two in the ``pw2`` parameter. Saturating Arithmetic --------------------- -A saturation addition, substraction, multiplication and division of all integer -types is provided by the ``ispc`` standard library. +A saturation (no overflow possible) addition, substraction, multiplication and +division of all integer types is provided by the ``ispc`` standard library. :: @@ -3645,9 +3645,7 @@ types is provided by the ``ispc`` standard library. In addition to the ``int8`` variants of saturating arithmetic functions listed above, there are versions that supports ``int16``, ``int32`` and ``int64`` -values as well. Functions that have best high-speed performance are functions -that support ``varying signed/unsined int8/int16`` on Intel® SSE and Intel® AVX, -because they have hardware implementation. +values as well. Pseudo-Random Numbers diff --git a/stdlib.ispc b/stdlib.ispc index d60219d0..a2ca02e7 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -4943,8 +4943,20 @@ static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { uniform unsigned int64 ret = 0; uniform int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; - uniform unsigned int64 a_abs = (a > 0) ? a : -a; - uniform unsigned int64 b_abs = (b > 0) ? b : -b; + uniform unsigned int64 a_abs = 0; + uniform unsigned int64 b_abs = 0; + + if (a == INT64_MIN) + a_abs = (uniform unsigned int64) INT64_MIN; + // Operation "-" is undefined for "INT64_MIN". + //See 6.3.1.3 section in C99 standart. + else + a_abs = (a > 0) ? a : -a; + + if (b == INT64_MIN) + b_abs = (uniform unsigned int64) INT64_MIN; + else + b_abs = (b > 0) ? b : -b; uniform unsigned int32 a0 = a_abs & 0xFFFFFFFF; uniform unsigned int32 b0 = b_abs & 0xFFFFFFFF; @@ -4969,7 +4981,7 @@ static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { } - if ((sign < 0) && (ret >= -INT64_MIN)) { + if ((sign < 0) && (ret >= (uniform unsigned int64) INT64_MIN)) { return INT64_MIN; } else if ((sign > 0) && (ret >= INT64_MAX)) { return INT64_MAX; @@ -4981,9 +4993,22 @@ static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { varying unsigned int64 ret = 0; - varying int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; - varying unsigned int64 a_abs = (a > 0) ? a : -a; - varying unsigned int64 b_abs = (b > 0) ? b : -b; + varying int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; + varying unsigned int64 a_abs = 0; + varying unsigned int64 b_abs = 0; + + if (a == INT64_MIN) + a_abs = (varying unsigned int64) INT64_MIN; + // Operation "-" is undefined for "INT64_MIN". + //See 6.3.1.3 section in C99 standart. + else + a_abs = (a > 0) ? a : -a; + + if (b == INT64_MIN) + b_abs = (varying unsigned int64) INT64_MIN; + else + b_abs = (b > 0) ? b : -b; + varying unsigned int32 a0 = a_abs & 0xFFFFFFFF; varying unsigned int32 b0 = b_abs & 0xFFFFFFFF; @@ -5008,7 +5033,7 @@ static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { } - if ((sign < 0) && (ret >= -INT64_MIN)) { + if ((sign < 0) && (ret >= (varying unsigned int64) INT64_MIN)) { return INT64_MIN; } else if ((sign > 0) && (ret >= INT64_MAX)) { return INT64_MAX; From 8999a69546a87798c70dc2f4200a1be8ce919560 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Tue, 11 Mar 2014 18:40:16 +0400 Subject: [PATCH 19/28] Fix for off by one problem in debug info with LLVM 3.3+ --- type.cpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/type.cpp b/type.cpp index 2e9d831e..4b7b0628 100644 --- a/type.cpp +++ b/type.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2013, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -81,6 +81,7 @@ lShouldPrintName(const std::string &name) { the given element type. */ static llvm::DIType lCreateDIArray(llvm::DIType eltType, int count) { +#ifdef LLVM_3_2 int lowerBound = 0, upperBound = count-1; if (count == 0) { @@ -90,6 +91,9 @@ lCreateDIArray(llvm::DIType eltType, int count) { } llvm::Value *sub = m->diBuilder->getOrCreateSubrange(lowerBound, upperBound); +#else // LLVM 3.3+ + llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, count); +#endif std::vector subs; subs.push_back(sub); llvm::DIArray subArray = m->diBuilder->getOrCreateArray(subs); @@ -571,7 +575,11 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const { } else if (variability == Variability::Varying) { llvm::DIType unifType = GetAsUniformType()->GetDIType(scope); +#ifdef LLVM_3_2 llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth()-1); +#else // LLVM 3.3+ + llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth()); +#endif llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub); uint64_t size = unifType.getSizeInBits() * g->target->getVectorWidth(); uint64_t align = unifType.getAlignInBits() * g->target->getVectorWidth(); @@ -838,7 +846,11 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const { case Variability::Uniform: return diType; case Variability::Varying: { +#ifdef LLVM_3_2 llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth()-1); +#else // LLVM 3.3+ + llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth()); +#endif llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub); uint64_t size = diType.getSizeInBits() * g->target->getVectorWidth(); uint64_t align = diType.getAlignInBits() * g->target->getVectorWidth(); @@ -1720,7 +1732,11 @@ VectorType::LLVMType(llvm::LLVMContext *ctx) const { llvm::DIType VectorType::GetDIType(llvm::DIDescriptor scope) const { llvm::DIType eltType = base->GetDIType(scope); +#ifdef LLVM_3_2 llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, numElements-1); +#else // LLVM 3.3+ + llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, numElements); +#endif llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub); uint64_t sizeBits = eltType.getSizeInBits() * numElements; From ead5cc741d07d57810831739f0fd71d5272330da Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Wed, 12 Mar 2014 12:58:50 +0400 Subject: [PATCH 20/28] support LLVM trunk after 203559 203213 and 203381 revisions --- builtins/util.m4 | 13 +++++++++++-- cbackend.cpp | 8 ++++++++ module.cpp | 8 ++++---- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/builtins/util.m4 b/builtins/util.m4 index f395b6bc..ad0149ad 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1497,7 +1497,12 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp, per_lane($1, <$1 x MASK> %mask, ` %cmp_LANE_ID = extractelement <$1 x $2> %cmp, i32 LANE %val_LANE_ID = extractelement <$1 x $2> %val, i32 LANE - %r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst + ifelse(LLVM_VERSION,LLVM_3_5,` + %r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst + ',` + %r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst + ') + %rp_LANE_ID = getelementptr $2 * %rptr32, i32 LANE store $2 %r_LANE_ID, $2 * %rp_LANE_ID') @@ -1507,7 +1512,11 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp, define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp, $2 %val) nounwind alwaysinline { - %r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst + ifelse(LLVM_VERSION,LLVM_3_5,` + %r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst + ',` + %r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst + ') ret $2 %r } ') diff --git a/cbackend.cpp b/cbackend.cpp index 8fc3b4ed..cb56cb82 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -464,7 +464,11 @@ namespace { // Must not be used in inline asm, extractelement, or shufflevector. if (I.hasOneUse()) { +#if defined(LLVM_3_5) + const llvm::Instruction &User = llvm::cast(*I.user_back()); +#else const llvm::Instruction &User = llvm::cast(*I.use_back()); +#endif if (isInlineAsm(User) || llvm::isa(User) || llvm::isa(User) || llvm::isa(User) || llvm::isa(User)) @@ -472,7 +476,11 @@ namespace { } // Only inline instruction it if it's use is in the same BB as the inst. +#if defined(LLVM_3_5) + return I.getParent() == llvm::cast(I.user_back())->getParent(); +#else return I.getParent() == llvm::cast(I.use_back())->getParent(); +#endif } // isDirectAlloca - Define fixed sized allocas in the entry block as direct diff --git a/module.cpp b/module.cpp index 014b7f5f..6aeeddfd 100644 --- a/module.cpp +++ b/module.cpp @@ -2099,12 +2099,12 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre } } -#if defined(LLVM_3_1) - inst.getLangOpts().BCPLComment = 1; -#else inst.getLangOpts().LineComment = 1; -#endif +#if defined(LLVM_3_5) + inst.createPreprocessor(clang::TU_Complete); +#else inst.createPreprocessor(); +#endif diagPrinter->BeginSourceFile(inst.getLangOpts(), &inst.getPreprocessor()); clang::DoPrintPreprocessedInput(inst.getPreprocessor(), From 1c0729df59dba5eb94fd6cd896e4ffdb3b02513d Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Wed, 12 Mar 2014 19:40:52 +0400 Subject: [PATCH 21/28] Clarifying comment on new functions with saturated arithmetics --- stdlib.ispc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/stdlib.ispc b/stdlib.ispc index a2ca02e7..731bc0bc 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -1,6 +1,6 @@ // -*- mode: c++ -*- /* - Copyright (c) 2010-2012, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -4947,9 +4947,12 @@ static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { uniform unsigned int64 b_abs = 0; if (a == INT64_MIN) + // Operation "-" is undefined for "INT64_MIN", as it causes overflow. + // But converting INT64_MIN to unsigned type yields the correct result, + // i.e. it will be positive value -INT64_MIN. + // See 6.3.1.3 section in C99 standart for more details (ISPC follows + // C standard, unless it's specifically different in the language). a_abs = (uniform unsigned int64) INT64_MIN; - // Operation "-" is undefined for "INT64_MIN". - //See 6.3.1.3 section in C99 standart. else a_abs = (a > 0) ? a : -a; @@ -4998,9 +5001,12 @@ static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { varying unsigned int64 b_abs = 0; if (a == INT64_MIN) + // Operation "-" is undefined for "INT64_MIN", as it causes overflow. + // But converting INT64_MIN to unsigned type yields the correct result, + // i.e. it will be positive value -INT64_MIN. + // See 6.3.1.3 section in C99 standart for more details (ISPC follows + // C standard, unless it's specifically different in the language). a_abs = (varying unsigned int64) INT64_MIN; - // Operation "-" is undefined for "INT64_MIN". - //See 6.3.1.3 section in C99 standart. else a_abs = (a > 0) ? a : -a; From f0ce2acc4fec1473e7f0cb0ec0937db33107ae5a Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Wed, 12 Mar 2014 19:42:08 +0400 Subject: [PATCH 22/28] Copyright update, VS2010->VS2012 update, small fix in saturated arithmetic description --- docs/ispc.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index 3fd31978..f5ba673e 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -354,7 +354,7 @@ the ``vout`` array before the next iteration of the ``foreach`` loop runs. On Linux\* and Mac OS\*, the makefile in that directory compiles this program. For Windows\*, open the ``examples/examples.sln`` file in Microsoft Visual -C++ 2010\* to build this (and the other) examples. In either case, +C++ 2012\* to build this (and the other) examples. In either case, build it now! We'll walk through the details of the compilation steps in the following section, `Using The ISPC Compiler`_.) In addition to compiling the ``ispc`` program, in this case the ``ispc`` compiler also @@ -3618,7 +3618,7 @@ normalized exponent as a power of two in the ``pw2`` parameter. Saturating Arithmetic --------------------- A saturation (no overflow possible) addition, substraction, multiplication and -division of all integer types is provided by the ``ispc`` standard library. +division of all integer types are provided by the ``ispc`` standard library. :: @@ -4958,7 +4958,7 @@ countries. * Other names and brands may be claimed as the property of others. -Copyright(C) 2011-2013, Intel Corporation. All rights reserved. +Copyright(C) 2011-2014, Intel Corporation. All rights reserved. Optimization Notice From 8f8a9d89ef76c6dbd6fa9c88a0b3e785043f5e5c Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Wed, 12 Mar 2014 19:43:30 +0400 Subject: [PATCH 23/28] Removing trailing spaces in stdlib.ispc --- stdlib.ispc | 732 ++++++++++++++++++++++++++-------------------------- 1 file changed, 366 insertions(+), 366 deletions(-) diff --git a/stdlib.ispc b/stdlib.ispc index 731bc0bc..a3845ded 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -29,13 +29,13 @@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** @file stdlib.ispc @brief Portion of the ispc standard library implementation that's in - ispc code + ispc code */ #if (ISPC_MASK_BITS == 1) @@ -162,52 +162,52 @@ static inline int16 broadcast(int16 v, uniform int i) { return __broadcast_i16(v, i); } -__declspec(safe) +__declspec(safe) static inline int32 broadcast(int32 v, uniform int i) { return __broadcast_i32(v, i); } -__declspec(safe) +__declspec(safe) static inline double broadcast(double v, uniform int i) { return __broadcast_double(v, i); } -__declspec(safe) +__declspec(safe) static inline int64 broadcast(int64 v, uniform int i) { return __broadcast_i64(v, i); } -__declspec(safe) +__declspec(safe) static inline float rotate(float v, uniform int i) { return __rotate_float(v, i); } -__declspec(safe) +__declspec(safe) static inline int8 rotate(int8 v, uniform int i) { return __rotate_i8(v, i); } -__declspec(safe) +__declspec(safe) static inline int16 rotate(int16 v, uniform int i) { return __rotate_i16(v, i); } -__declspec(safe) +__declspec(safe) static inline int32 rotate(int32 v, uniform int i) { return __rotate_i32(v, i); } -__declspec(safe) +__declspec(safe) static inline double rotate(double v, uniform int i) { return __rotate_double(v, i); } -__declspec(safe) +__declspec(safe) static inline int64 rotate(int64 v, uniform int i) { return __rotate_i64(v, i); } -__declspec(safe) +__declspec(safe) static inline float shift(float v, uniform int i) { varying float result; unmasked { @@ -216,7 +216,7 @@ static inline float shift(float v, uniform int i) { return result; } -__declspec(safe) +__declspec(safe) static inline int8 shift(int8 v, uniform int i) { varying int8 result; unmasked { @@ -225,7 +225,7 @@ static inline int8 shift(int8 v, uniform int i) { return result; } -__declspec(safe) +__declspec(safe) static inline int16 shift(int16 v, uniform int i) { varying int16 result; unmasked { @@ -234,7 +234,7 @@ static inline int16 shift(int16 v, uniform int i) { return result; } -__declspec(safe) +__declspec(safe) static inline int32 shift(int32 v, uniform int i) { varying int32 result; unmasked { @@ -243,7 +243,7 @@ static inline int32 shift(int32 v, uniform int i) { return result; } -__declspec(safe) +__declspec(safe) static inline double shift(double v, uniform int i) { varying double result; unmasked { @@ -252,7 +252,7 @@ static inline double shift(double v, uniform int i) { return result; } -__declspec(safe) +__declspec(safe) static inline int64 shift(int64 v, uniform int i) { varying int64 result; unmasked { @@ -261,184 +261,184 @@ static inline int64 shift(int64 v, uniform int i) { return result; } -__declspec(safe) +__declspec(safe) static inline float shuffle(float v, int i) { return __shuffle_float(v, i); } -__declspec(safe) +__declspec(safe) static inline int8 shuffle(int8 v, int i) { return __shuffle_i8(v, i); } -__declspec(safe) +__declspec(safe) static inline int16 shuffle(int16 v, int i) { return __shuffle_i16(v, i); } -__declspec(safe) +__declspec(safe) static inline int32 shuffle(int32 v, int i) { return __shuffle_i32(v, i); } -__declspec(safe) +__declspec(safe) static inline double shuffle(double v, int i) { return __shuffle_double(v, i); } -__declspec(safe) +__declspec(safe) static inline int64 shuffle(int64 v, int i) { return __shuffle_i64(v, i); } -__declspec(safe) +__declspec(safe) static inline float shuffle(float v0, float v1, int i) { return __shuffle2_float(v0, v1, i); } -__declspec(safe) +__declspec(safe) static inline int8 shuffle(int8 v0, int8 v1, int i) { return __shuffle2_i8(v0, v1, i); } -__declspec(safe) +__declspec(safe) static inline int16 shuffle(int16 v0, int16 v1, int i) { return __shuffle2_i16(v0, v1, i); } -__declspec(safe) +__declspec(safe) static inline int32 shuffle(int32 v0, int32 v1, int i) { return __shuffle2_i32(v0, v1, i); } -__declspec(safe) +__declspec(safe) static inline double shuffle(double v0, double v1, int i) { return __shuffle2_double(v0, v1, i); } -__declspec(safe) +__declspec(safe) static inline int64 shuffle(int64 v0, int64 v1, int i) { return __shuffle2_i64(v0, v1, i); } // x[i] -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform float extract(float x, uniform int i) { return floatbits(__extract_int32((int)intbits(x), i)); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform int8 extract(int8 x, uniform int i) { return __extract_int8(x, i); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform unsigned int8 extract(unsigned int8 x, uniform int i) { return __extract_int8(x, (unsigned int)i); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform int16 extract(int16 x, uniform int i) { return __extract_int16(x, i); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform unsigned int16 extract(unsigned int16 x, uniform int i) { return __extract_int16(x, (unsigned int)i); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform int32 extract(int32 x, uniform int i) { return __extract_int32(x, i); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform unsigned int32 extract(unsigned int32 x, uniform int i) { return __extract_int32(x, (unsigned int)i); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform double extract(double x, uniform int i) { return doublebits(__extract_int64((int64)intbits(x), i)); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform int64 extract(int64 x, uniform int i) { return __extract_int64(x, i); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform unsigned int64 extract(unsigned int64 x, uniform int i) { return __extract_int64(x, (unsigned int)i); } // x[i] = v -__declspec(safe,cost1) +__declspec(safe,cost1) static inline float insert(float x, uniform int i, uniform float v) { return floatbits(__insert_int32((int)intbits(x), i, (int)intbits(v))); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline int8 insert(int8 x, uniform int i, uniform int8 v) { return __insert_int8(x, i, v); } -__declspec(safe,cost1) -static inline unsigned int8 insert(unsigned int8 x, uniform int i, +__declspec(safe,cost1) +static inline unsigned int8 insert(unsigned int8 x, uniform int i, uniform unsigned int8 v) { return __insert_int8(x, (unsigned int)i, v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline int16 insert(int16 x, uniform int i, uniform int16 v) { return __insert_int16(x, i, v); } -__declspec(safe,cost1) -static inline unsigned int16 insert(unsigned int16 x, uniform int i, +__declspec(safe,cost1) +static inline unsigned int16 insert(unsigned int16 x, uniform int i, uniform unsigned int16 v) { return __insert_int16(x, (unsigned int)i, v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline int32 insert(int32 x, uniform int i, uniform int32 v) { return __insert_int32(x, i, v); } -__declspec(safe,cost1) -static inline unsigned int32 insert(unsigned int32 x, uniform int i, +__declspec(safe,cost1) +static inline unsigned int32 insert(unsigned int32 x, uniform int i, uniform unsigned int32 v) { return __insert_int32(x, (unsigned int)i, v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline double insert(double x, uniform int i, uniform double v) { return doublebits(__insert_int64((int64)intbits(x), i, (int64)intbits(v))); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline int64 insert(int64 x, uniform int i, uniform int64 v) { return __insert_int64(x, i, v); } -__declspec(safe,cost1) -static inline unsigned int64 insert(unsigned int64 x, uniform int i, +__declspec(safe,cost1) +static inline unsigned int64 insert(unsigned int64 x, uniform int i, uniform unsigned int64 v) { return __insert_int64(x, (unsigned int)i, v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform int32 sign_extend(uniform bool v) { return __sext_uniform_bool(v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline int32 sign_extend(bool v) { return __sext_varying_bool(v); } -__declspec(safe) +__declspec(safe) static inline uniform bool any(bool v) { // We only care about whether "any" is true for the active program instances, // so we have to make v with the current program mask. @@ -449,7 +449,7 @@ static inline uniform bool any(bool v) { #endif } -__declspec(safe) +__declspec(safe) static inline uniform bool all(bool v) { // As with any(), we need to explicitly mask v with the current program mask // so we're only looking at the current lanes @@ -471,17 +471,17 @@ static inline uniform bool none(bool v) { #endif } -__declspec(safe) +__declspec(safe) static inline uniform int32 popcnt(uniform int32 v) { return __popcnt_int32(v); } -__declspec(safe) +__declspec(safe) static inline uniform int popcnt(uniform int64 v) { return (int32)__popcnt_int64(v); } -__declspec(safe) +__declspec(safe) static inline int popcnt(int v) { int r; for (uniform int i = 0; i < programCount; ++i) @@ -489,7 +489,7 @@ static inline int popcnt(int v) { return __mask ? r : 0; } -__declspec(safe) +__declspec(safe) static inline int popcnt(int64 v) { int r; for (uniform int i = 0; i < programCount; ++i) @@ -497,7 +497,7 @@ static inline int popcnt(int64 v) { return __mask ? r : 0; } -__declspec(safe) +__declspec(safe) static inline uniform int popcnt(bool v) { // As with any() and all(), only count across the active lanes #if (ISPC_MASK_BITS == 1) @@ -507,7 +507,7 @@ static inline uniform int popcnt(bool v) { #endif } -__declspec(safe) +__declspec(safe) static inline uniform unsigned int64 lanemask() { return __movmsk(__mask); } @@ -515,17 +515,17 @@ static inline uniform unsigned int64 lanemask() { /////////////////////////////////////////////////////////////////////////// // memcpy/memmove/memset -static inline void memcpy(void * uniform dst, void * uniform src, +static inline void memcpy(void * uniform dst, void * uniform src, uniform int32 count) { __memcpy32((int8 * uniform)dst, (int8 * uniform)src, count); } -static inline void memcpy64(void * uniform dst, void * uniform src, +static inline void memcpy64(void * uniform dst, void * uniform src, uniform int64 count) { __memcpy64((int8 * uniform)dst, (int8 * uniform)src, count); } -static inline void memcpy(void * varying dst, void * varying src, +static inline void memcpy(void * varying dst, void * varying src, int32 count) { void * uniform da[programCount]; void * uniform sa[programCount]; @@ -539,7 +539,7 @@ static inline void memcpy(void * varying dst, void * varying src, } } -static inline void memcpy64(void * varying dst, void * varying src, +static inline void memcpy64(void * varying dst, void * varying src, int64 count) { void * uniform da[programCount]; void * uniform sa[programCount]; @@ -553,17 +553,17 @@ static inline void memcpy64(void * varying dst, void * varying src, } } -static inline void memmove(void * uniform dst, void * uniform src, +static inline void memmove(void * uniform dst, void * uniform src, uniform int32 count) { __memmove32((int8 * uniform)dst, (int8 * uniform)src, count); } -static inline void memmove64(void * uniform dst, void * uniform src, +static inline void memmove64(void * uniform dst, void * uniform src, uniform int64 count) { __memmove64((int8 * uniform)dst, (int8 * uniform)src, count); } -static inline void memmove(void * varying dst, void * varying src, +static inline void memmove(void * varying dst, void * varying src, int32 count) { void * uniform da[programCount]; void * uniform sa[programCount]; @@ -577,7 +577,7 @@ static inline void memmove(void * varying dst, void * varying src, } } -static inline void memmove64(void * varying dst, void * varying src, +static inline void memmove64(void * varying dst, void * varying src, int64 count) { void * uniform da[programCount]; void * uniform sa[programCount]; @@ -591,12 +591,12 @@ static inline void memmove64(void * varying dst, void * varying src, } } -static inline void memset(void * uniform ptr, uniform int8 val, +static inline void memset(void * uniform ptr, uniform int8 val, uniform int32 count) { __memset32((int8 * uniform)ptr, val, count); } -static inline void memset64(void * uniform ptr, uniform int8 val, +static inline void memset64(void * uniform ptr, uniform int8 val, uniform int64 count) { __memset64((int8 * uniform)ptr, val, count); } @@ -622,55 +622,55 @@ static inline void memset64(void * varying ptr, int8 val, int64 count) { /////////////////////////////////////////////////////////////////////////// // count leading/trailing zeros -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform unsigned int32 count_leading_zeros(uniform unsigned int32 v) { return __count_leading_zeros_i32(v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform unsigned int64 count_leading_zeros(uniform unsigned int64 v) { return __count_leading_zeros_i64(v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform unsigned int32 count_trailing_zeros(uniform unsigned int32 v) { return __count_trailing_zeros_i32(v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform unsigned int64 count_trailing_zeros(uniform unsigned int64 v) { return __count_trailing_zeros_i64(v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform int32 count_leading_zeros(uniform int32 v) { return __count_leading_zeros_i32(v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform int64 count_leading_zeros(uniform int64 v) { return __count_leading_zeros_i64(v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform int32 count_trailing_zeros(uniform int32 v) { return __count_trailing_zeros_i32(v); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform int64 count_trailing_zeros(uniform int64 v) { return __count_trailing_zeros_i64(v); } -__declspec(safe) +__declspec(safe) static inline unsigned int32 count_leading_zeros(unsigned int32 v) { unsigned int32 r; @@ -679,7 +679,7 @@ count_leading_zeros(unsigned int32 v) { return r; } -__declspec(safe) +__declspec(safe) static inline unsigned int64 count_leading_zeros(unsigned int64 v) { unsigned int64 r; @@ -688,7 +688,7 @@ count_leading_zeros(unsigned int64 v) { return r; } -__declspec(safe) +__declspec(safe) static inline unsigned int32 count_trailing_zeros(unsigned int32 v) { unsigned int32 r; @@ -697,7 +697,7 @@ count_trailing_zeros(unsigned int32 v) { return r; } -__declspec(safe) +__declspec(safe) static inline unsigned int64 count_trailing_zeros(unsigned int64 v) { unsigned int64 r; @@ -706,7 +706,7 @@ count_trailing_zeros(unsigned int64 v) { return r; } -__declspec(safe) +__declspec(safe) static inline int32 count_leading_zeros(int32 v) { int32 r; @@ -715,7 +715,7 @@ count_leading_zeros(int32 v) { return r; } -__declspec(safe) +__declspec(safe) static inline int64 count_leading_zeros(int64 v) { int64 r; @@ -724,7 +724,7 @@ count_leading_zeros(int64 v) { return r; } -__declspec(safe) +__declspec(safe) static inline int32 count_trailing_zeros(int32 v) { int32 r; @@ -733,7 +733,7 @@ count_trailing_zeros(int32 v) { return r; } -__declspec(safe) +__declspec(safe) static inline int64 count_trailing_zeros(int64 v) { int64 r; @@ -746,7 +746,7 @@ count_trailing_zeros(int64 v) { // AOS/SOA conversion static inline void -aos_to_soa3(uniform float a[], varying float * uniform v0, +aos_to_soa3(uniform float a[], varying float * uniform v0, varying float * uniform v1, varying float * uniform v2) { __aos_to_soa3_float(a, v0, v1, v2); } @@ -771,7 +771,7 @@ soa_to_aos4(float v0, float v1, float v2, float v3, uniform float a[]) { static inline void aos_to_soa3(uniform int32 a[], varying int32 * uniform v0, varying int32 * uniform v1, varying int32 * uniform v2) { - aos_to_soa3((uniform float * uniform)a, (varying float * uniform)v0, + aos_to_soa3((uniform float * uniform)a, (varying float * uniform)v0, (varying float * uniform)v1, (varying float * uniform)v2); } @@ -782,39 +782,39 @@ soa_to_aos3(int32 v0, int32 v1, int32 v2, uniform int32 a[]) { } static inline void -aos_to_soa4(uniform int32 a[], varying int32 * uniform v0, - varying int32 * uniform v1, varying int32 * uniform v2, +aos_to_soa4(uniform int32 a[], varying int32 * uniform v0, + varying int32 * uniform v1, varying int32 * uniform v2, varying int32 * uniform v3) { - aos_to_soa4((uniform float * uniform)a, (varying float * uniform )v0, - (varying float * uniform)v1, (varying float * uniform)v2, + aos_to_soa4((uniform float * uniform)a, (varying float * uniform )v0, + (varying float * uniform)v1, (varying float * uniform)v2, (varying float * uniform)v3); } static inline void soa_to_aos4(int32 v0, int32 v1, int32 v2, int32 v3, uniform int32 a[]) { - soa_to_aos4(floatbits(v0), floatbits(v1), floatbits(v2), floatbits(v3), + soa_to_aos4(floatbits(v0), floatbits(v1), floatbits(v2), floatbits(v3), (uniform float * uniform)a); } /////////////////////////////////////////////////////////////////////////// // Prefetching -__declspec(safe,cost1) +__declspec(safe,cost1) static inline void prefetch_l1(const void * uniform ptr) { __prefetch_read_uniform_1((uniform int8 * uniform)ptr); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline void prefetch_l2(const void * uniform ptr) { __prefetch_read_uniform_2((uniform int8 * uniform)ptr); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline void prefetch_l3(const void * uniform ptr) { __prefetch_read_uniform_3((uniform int8 * uniform)ptr); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline void prefetch_nt(const void * uniform ptr) { __prefetch_read_uniform_nt((uniform int8 * uniform)ptr); } @@ -1001,13 +1001,13 @@ static inline uniform unsigned int32 reduce_add(unsigned int16 x) { return __reduce_add_int16(__mask ? x : (int16)0); } -__declspec(safe) +__declspec(safe) static inline uniform float reduce_add(float x) { // zero the lanes where the mask is off return __reduce_add_float(__mask ? x : 0.); } -__declspec(safe) +__declspec(safe) static inline uniform float reduce_min(float v) { // For the lanes where the mask is off, replace the given value with // infinity, so that it doesn't affect the result. @@ -1022,7 +1022,7 @@ static inline uniform float reduce_min(float v) { return result; } -__declspec(safe) +__declspec(safe) static inline uniform float reduce_max(float v) { // For the lanes where the mask is off, replace the given value with // negative infinity, so that it doesn't affect the result. @@ -1037,13 +1037,13 @@ static inline uniform float reduce_max(float v) { return result; } -__declspec(safe) +__declspec(safe) static inline uniform int64 reduce_add(int32 x) { // Zero out the values for lanes that aren't running return __reduce_add_int32(__mask ? x : 0); } -__declspec(safe) +__declspec(safe) static inline uniform int reduce_min(int v) { // Set values for non-running lanes to the maximum integer value so // they don't affect the result. @@ -1051,7 +1051,7 @@ static inline uniform int reduce_min(int v) { return __reduce_min_int32(__mask ? v : int_max); } -__declspec(safe) +__declspec(safe) static inline uniform int reduce_max(int v) { // Set values for non-running lanes to the minimum integer value so // they don't affect the result. @@ -1059,14 +1059,14 @@ static inline uniform int reduce_max(int v) { return __reduce_max_int32(__mask ? v : int_min); } -__declspec(safe) +__declspec(safe) static inline uniform unsigned int64 reduce_add(unsigned int32 x) { // Set values for non-running lanes to zero so they don't affect the // result. return __reduce_add_int32(__mask ? x : 0); } -__declspec(safe) +__declspec(safe) static inline uniform unsigned int reduce_min(unsigned int v) { // Set values for non-running lanes to the maximum unsigned integer // value so they don't affect the result. @@ -1074,20 +1074,20 @@ static inline uniform unsigned int reduce_min(unsigned int v) { return __reduce_min_uint32(__mask ? v : uint_max); } -__declspec(safe) +__declspec(safe) static inline uniform unsigned int reduce_max(unsigned int v) { // Set values for non-running lanes to zero so they don't affect the // result. return __reduce_max_uint32(__mask ? v : 0); } -__declspec(safe) +__declspec(safe) static inline uniform double reduce_add(double x) { // zero the lanes where the mask is off return __reduce_add_double(__mask ? x : 0.); } -__declspec(safe) +__declspec(safe) static inline uniform double reduce_min(double v) { int64 iflt_max = 0x7ff0000000000000; // infinity // unmasked block is needed to make sure that argument for unmasked @@ -1100,7 +1100,7 @@ static inline uniform double reduce_min(double v) { return result; } -__declspec(safe) +__declspec(safe) static inline uniform double reduce_max(double v) { const int64 iflt_neg_max = 0xfff0000000000000; // -infinity // unmasked block is needed to make sure that argument for unmasked @@ -1113,13 +1113,13 @@ static inline uniform double reduce_max(double v) { return result; } -__declspec(safe) +__declspec(safe) static inline uniform int64 reduce_add(int64 x) { // Zero out the values for lanes that aren't running return __reduce_add_int64(__mask ? x : 0); } -__declspec(safe) +__declspec(safe) static inline uniform int64 reduce_min(int64 v) { // Set values for non-running lanes to the maximum integer value so // they don't affect the result. @@ -1127,7 +1127,7 @@ static inline uniform int64 reduce_min(int64 v) { return __reduce_min_int64(__mask ? v : int_max); } -__declspec(safe) +__declspec(safe) static inline uniform int64 reduce_max(int64 v) { // Set values for non-running lanes to the minimum integer value so // they don't affect the result. @@ -1135,14 +1135,14 @@ static inline uniform int64 reduce_max(int64 v) { return __reduce_max_int64(__mask ? v : int_min); } -__declspec(safe) +__declspec(safe) static inline uniform unsigned int64 reduce_add(unsigned int64 x) { // Set values for non-running lanes to zero so they don't affect the // result. return __reduce_add_int64(__mask ? x : 0); } -__declspec(safe) +__declspec(safe) static inline uniform unsigned int64 reduce_min(unsigned int64 v) { // Set values for non-running lanes to the maximum unsigned integer // value so they don't affect the result. @@ -1150,7 +1150,7 @@ static inline uniform unsigned int64 reduce_min(unsigned int64 v) { return __reduce_min_uint64(__mask ? v : uint_max); } -__declspec(safe) +__declspec(safe) static inline uniform unsigned int64 reduce_max(unsigned int64 v) { // Set values for non-running lanes to zero so they don't affect the // result. @@ -1234,7 +1234,7 @@ static unsigned int64 exclusive_scan_or(unsigned int64 v) { /////////////////////////////////////////////////////////////////////////// // packed load, store -static inline uniform int +static inline uniform int packed_load_active(uniform unsigned int a[], varying unsigned int * uniform vals) { return __packed_load_active(a, vals, (UIntMaskType)__mask); @@ -1253,12 +1253,12 @@ packed_store_active2(uniform unsigned int a[], } -static inline uniform int +static inline uniform int packed_load_active(uniform int a[], varying int * uniform vals) { return __packed_load_active(a, vals, (IntMaskType)__mask); } -static inline uniform int +static inline uniform int packed_store_active(uniform int a[], int vals) { return __packed_store_active(a, vals, (IntMaskType)__mask); } @@ -1276,7 +1276,7 @@ static inline uniform int num_cores() { return __num_cores(); } -__declspec(safe) +__declspec(safe) static inline uniform int64 clock() { return __clock(); } @@ -1304,7 +1304,7 @@ static inline bool isnan(double v) { return v != v; } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline float abs(float a) { // Floating-point hack: zeroing the high bit clears the sign unsigned int i = intbits(a); @@ -1312,14 +1312,14 @@ static inline float abs(float a) { return floatbits(i); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform float abs(uniform float a) { uniform unsigned int i = intbits(a); i &= 0x7fffffff; return floatbits(i); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline double abs(double a) { // zeroing the high bit clears the sign unsigned int64 i = intbits(a); @@ -1327,103 +1327,103 @@ static inline double abs(double a) { return doublebits(i); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform double abs(uniform double a) { uniform unsigned int64 i = intbits(a); i &= 0x7fffffffffffffff; return doublebits(i); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline unsigned int signbits(float x) { unsigned int i = intbits(x); return (i & 0x80000000); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform unsigned int signbits(uniform float x) { uniform unsigned int i = intbits(x); return (i & 0x80000000); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline unsigned int64 signbits(double x) { unsigned int64 i = intbits(x); return (i & 0x8000000000000000); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform unsigned int64 signbits(uniform double x) { uniform unsigned int64 i = intbits(x); return (i & 0x8000000000000000); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline float round(float x) { return __round_varying_float(x); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline uniform float round(uniform float x) { return __round_uniform_float(x); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline double round(double x) { return __round_varying_double(x); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline uniform double round(uniform double x) { return __round_uniform_double(x); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline float floor(float x) { return __floor_varying_float(x); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline uniform float floor(uniform float x) { return __floor_uniform_float(x); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline double floor(double x) { return __floor_varying_double(x); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline uniform double floor(uniform double x) { return __floor_uniform_double(x); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline float ceil(float x) { return __ceil_varying_float(x); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline uniform float ceil(uniform float x) { return __ceil_uniform_float(x); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline double ceil(double x) { return __ceil_varying_double(x); } -__declspec(safe,cost2) +__declspec(safe,cost2) static inline uniform double ceil(uniform double x) { return __ceil_uniform_double(x); } -__declspec(safe) +__declspec(safe) static inline float rcp(float v) { return __rcp_varying_float(v); } -__declspec(safe) +__declspec(safe) static inline uniform float rcp(uniform float v) { return __rcp_uniform_float(v); } @@ -1445,16 +1445,16 @@ static inline QUAL double __rcp_safe_##QUAL##_double(QUAL double x) \ QUAL double exp = doublebits( 0x7fd0000000000000 + ~ex ); \ QUAL double y = rcp((QUAL float)(x*exp)); \ return __rcp_iterate_##QUAL##_double(x, y*exp); \ -} +} RCPD(varying) -__declspec(safe) -static inline double rcp(double v) { +__declspec(safe) +static inline double rcp(double v) { if (__have_native_rcpd) return __rcp_varying_double(v); else return __rcp_safe_varying_double(v); -} +} RCPD(uniform) __declspec(safe) @@ -1470,22 +1470,22 @@ static inline uniform double rcp(uniform double v) { // float -__declspec(safe,cost1) +__declspec(safe,cost1) static inline float min(float a, float b) { return __min_varying_float(a, b); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform float min(uniform float a, uniform float b) { return __min_uniform_float(a, b); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline float max(float a, float b) { return __max_varying_float(a, b); } -__declspec(safe,cost1) +__declspec(safe,cost1) static inline uniform float max(uniform float a, uniform float b) { return __max_uniform_float(a, b); } @@ -1493,22 +1493,22 @@ static inline uniform float max(uniform float a, uniform float b) { // double -__declspec(safe) +__declspec(safe) static inline double min(double a, double b) { return __min_varying_double(a, b); } -__declspec(safe) +__declspec(safe) static inline uniform double min(uniform double a, uniform double b) { return __min_uniform_double(a, b); } -__declspec(safe) +__declspec(safe) static inline double max(double a, double b) { return __max_varying_double(a, b); } -__declspec(safe) +__declspec(safe) static inline uniform double max(uniform double a, uniform double b) { return __max_uniform_double(a, b); } @@ -1522,7 +1522,7 @@ static inline uniform unsigned int8 min(uniform unsigned int8 a, } __declspec(safe,cost1) -static inline uniform unsigned int8 max(uniform unsigned int8 a, +static inline uniform unsigned int8 max(uniform unsigned int8 a, uniform unsigned int8 b) { return (a > b) ? a : b; } @@ -1560,13 +1560,13 @@ static inline int8 max(int8 a, int8 b) { // int16 __declspec(safe,cost1) -static inline uniform unsigned int16 min(uniform unsigned int16 a, +static inline uniform unsigned int16 min(uniform unsigned int16 a, uniform unsigned int16 b) { return (a < b) ? a : b; } __declspec(safe,cost1) -static inline uniform unsigned int16 max(uniform unsigned int16 a, +static inline uniform unsigned int16 max(uniform unsigned int16 a, uniform unsigned int16 b) { return (a > b) ? a : b; } @@ -1715,14 +1715,14 @@ static inline uniform double clamp(uniform double v, uniform double low, uniform // int8 __declspec(safe,cost2) -static inline unsigned int8 clamp(unsigned int8 v, unsigned int8 low, +static inline unsigned int8 clamp(unsigned int8 v, unsigned int8 low, unsigned int8 high) { return min(max(v, low), high); } __declspec(safe,cost2) -static inline uniform unsigned int8 clamp(uniform unsigned int8 v, - uniform unsigned int8 low, +static inline uniform unsigned int8 clamp(uniform unsigned int8 v, + uniform unsigned int8 low, uniform unsigned int8 high) { return min(max(v, low), high); } @@ -1733,7 +1733,7 @@ static inline int8 clamp(int8 v, int8 low, int8 high) { } __declspec(safe,cost2) -static inline uniform int8 clamp(uniform int8 v, uniform int8 low, +static inline uniform int8 clamp(uniform int8 v, uniform int8 low, uniform int8 high) { return min(max(v, low), high); } @@ -1741,14 +1741,14 @@ static inline uniform int8 clamp(uniform int8 v, uniform int8 low, // int16 __declspec(safe,cost2) -static inline unsigned int16 clamp(unsigned int16 v, unsigned int16 low, +static inline unsigned int16 clamp(unsigned int16 v, unsigned int16 low, unsigned int16 high) { return min(max(v, low), high); } __declspec(safe,cost2) -static inline uniform unsigned int16 clamp(uniform unsigned int16 v, - uniform unsigned int16 low, +static inline uniform unsigned int16 clamp(uniform unsigned int16 v, + uniform unsigned int16 low, uniform unsigned int16 high) { return min(max(v, low), high); } @@ -1759,7 +1759,7 @@ static inline int16 clamp(int16 v, int16 low, int16 high) { } __declspec(safe,cost2) -static inline uniform int16 clamp(uniform int16 v, uniform int16 low, +static inline uniform int16 clamp(uniform int16 v, uniform int16 low, uniform int16 high) { return min(max(v, low), high); } @@ -1772,7 +1772,7 @@ static inline unsigned int clamp(unsigned int v, unsigned int low, unsigned int } __declspec(safe,cost2) -static inline uniform unsigned int clamp(uniform unsigned int v, uniform unsigned int low, +static inline uniform unsigned int clamp(uniform unsigned int v, uniform unsigned int low, uniform unsigned int high) { return min(max(v, low), high); } @@ -1790,14 +1790,14 @@ static inline uniform int clamp(uniform int v, uniform int low, uniform int high // int64 __declspec(safe,cost2) -static inline unsigned int64 clamp(unsigned int64 v, unsigned int64 low, +static inline unsigned int64 clamp(unsigned int64 v, unsigned int64 low, unsigned int64 high) { return min(max(v, low), high); } __declspec(safe,cost2) -static inline uniform unsigned int64 clamp(uniform unsigned int64 v, - uniform unsigned int64 low, +static inline uniform unsigned int64 clamp(uniform unsigned int64 v, + uniform unsigned int64 low, uniform unsigned int64 high) { return min(max(v, low), high); } @@ -1808,7 +1808,7 @@ static inline int64 clamp(int64 v, int64 low, int64 high) { } __declspec(safe,cost2) -static inline uniform int64 clamp(uniform int64 v, uniform int64 low, +static inline uniform int64 clamp(uniform int64 v, uniform int64 low, uniform int64 high) { return min(max(v, low), high); } @@ -2025,8 +2025,8 @@ static inline void *atomic_swap_global(void ** ptr, void * value) { (intptr_t)value); } -static inline void * -atomic_compare_exchange_global(void ** uniform ptr, +static inline void * +atomic_compare_exchange_global(void ** uniform ptr, void * oldval, void * newval) { return (void *)atomic_compare_exchange_global((intptr_t * uniform)ptr, (intptr_t)oldval, @@ -2034,8 +2034,8 @@ atomic_compare_exchange_global(void ** uniform ptr, } static inline void * uniform -atomic_compare_exchange_global(void ** uniform ptr, void * uniform oldval, - void * uniform newval) { +atomic_compare_exchange_global(void ** uniform ptr, void * uniform oldval, + void * uniform newval) { return (void * uniform)atomic_compare_exchange_global((intptr_t * uniform)ptr, (uniform intptr_t)oldval, (uniform intptr_t)newval); @@ -2085,17 +2085,17 @@ static inline uniform int32 __or(uniform int32 a, uniform int32 b) { return a | static inline uniform int32 __xor(uniform int32 a, uniform int32 b) { return a ^ b; } static inline uniform int32 __swap(uniform int32 a, uniform int32 b) { return b; } -static inline uniform unsigned int32 __add(uniform unsigned int32 a, +static inline uniform unsigned int32 __add(uniform unsigned int32 a, uniform unsigned int32 b) { return a+b; } -static inline uniform unsigned int32 __sub(uniform unsigned int32 a, +static inline uniform unsigned int32 __sub(uniform unsigned int32 a, uniform unsigned int32 b) { return a-b; } -static inline uniform unsigned int32 __and(uniform unsigned int32 a, +static inline uniform unsigned int32 __and(uniform unsigned int32 a, uniform unsigned int32 b) { return a & b; } -static inline uniform unsigned int32 __or(uniform unsigned int32 a, +static inline uniform unsigned int32 __or(uniform unsigned int32 a, uniform unsigned int32 b) { return a | b; } -static inline uniform unsigned int32 __xor(uniform unsigned int32 a, +static inline uniform unsigned int32 __xor(uniform unsigned int32 a, uniform unsigned int32 b) { return a ^ b; } -static inline uniform unsigned int32 __swap(uniform unsigned int32 a, +static inline uniform unsigned int32 __swap(uniform unsigned int32 a, uniform unsigned int32 b) { return b; } @@ -2110,17 +2110,17 @@ static inline uniform int64 __or(uniform int64 a, uniform int64 b) { return a | static inline uniform int64 __xor(uniform int64 a, uniform int64 b) { return a ^ b; } static inline uniform int64 __swap(uniform int64 a, uniform int64 b) { return b; } -static inline uniform unsigned int64 __add(uniform unsigned int64 a, +static inline uniform unsigned int64 __add(uniform unsigned int64 a, uniform unsigned int64 b) { return a+b; } -static inline uniform unsigned int64 __sub(uniform unsigned int64 a, +static inline uniform unsigned int64 __sub(uniform unsigned int64 a, uniform unsigned int64 b) { return a-b; } -static inline uniform unsigned int64 __and(uniform unsigned int64 a, +static inline uniform unsigned int64 __and(uniform unsigned int64 a, uniform unsigned int64 b) { return a & b; } -static inline uniform unsigned int64 __or(uniform unsigned int64 a, +static inline uniform unsigned int64 __or(uniform unsigned int64 a, uniform unsigned int64 b) { return a | b; } -static inline uniform unsigned int64 __xor(uniform unsigned int64 a, +static inline uniform unsigned int64 __xor(uniform unsigned int64 a, uniform unsigned int64 b) { return a ^ b; } -static inline uniform unsigned int64 __swap(uniform unsigned int64 a, +static inline uniform unsigned int64 __swap(uniform unsigned int64 a, uniform unsigned int64 b) { return b; } static inline uniform double __add(uniform double a, uniform double b) { return a+b; } @@ -2239,8 +2239,8 @@ static inline void *atomic_swap_local(void ** ptr, void * value) { (intptr_t)value); } -static inline void * -atomic_compare_exchange_local(void ** uniform ptr, +static inline void * +atomic_compare_exchange_local(void ** uniform ptr, void * oldval, void * newval) { return (void *)atomic_compare_exchange_local((intptr_t * uniform)ptr, (intptr_t)oldval, @@ -2248,8 +2248,8 @@ atomic_compare_exchange_local(void ** uniform ptr, } static inline void * uniform -atomic_compare_exchange_local(void ** uniform ptr, void * uniform oldval, - void * uniform newval) { +atomic_compare_exchange_local(void ** uniform ptr, void * uniform oldval, + void * uniform newval) { return (void * uniform)atomic_compare_exchange_local((intptr_t * uniform)ptr, (uniform intptr_t)oldval, (uniform intptr_t)newval); @@ -2335,7 +2335,7 @@ static inline uniform float frexp(uniform float x, uniform int * uniform pw2) { __declspec(safe) static inline float sin(float x_full) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __sin_varying_float(x_full); } @@ -2350,7 +2350,7 @@ static inline float sin(float x_full) { } return ret; } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { static const float pi_over_two_vec = 1.57079637050628662109375; static const float two_over_pi_vec = 0.636619746685028076171875; @@ -2401,7 +2401,7 @@ static inline float sin(float x_full) { __declspec(safe) static inline uniform float sin(uniform float x_full) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __sin_uniform_float(x_full); } @@ -2409,7 +2409,7 @@ static inline uniform float sin(uniform float x_full) { __math_lib == __math_lib_svml) { return __stdlib_sinf(x_full); } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { static const uniform float pi_over_two_vec = 1.57079637050628662109375; static const uniform float two_over_pi_vec = 0.636619746685028076171875; @@ -2476,13 +2476,13 @@ static inline float asin(float x0) { bool isnan = (x > 1); float v; - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __asin_varying_float(x0); } else if (__math_lib == __math_lib_svml) { return __svml_asinf(x0); - } + } else if (__math_lib == __math_lib_system) { float ret; foreach_active (i) { @@ -2497,15 +2497,15 @@ static inline float asin(float x0) { // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|], // [|single...|], [1e-20;.9999999999999999]); // avg error: 8.5716801e-09, max error: 2.1373853e-07 - v = 1.57079637050628662109375f + - x * (-0.21460501849651336669921875f + - x * (8.9116774499416351318359375e-2f + - x * (-5.146093666553497314453125e-2f + - x * (3.7269376218318939208984375e-2f + - x * (-3.5882405936717987060546875e-2f + + v = 1.57079637050628662109375f + + x * (-0.21460501849651336669921875f + + x * (8.9116774499416351318359375e-2f + + x * (-5.146093666553497314453125e-2f + + x * (3.7269376218318939208984375e-2f + + x * (-3.5882405936717987060546875e-2f + x * (4.14929799735546112060546875e-2f + x * (-4.25077490508556365966796875e-2f + - x * (3.05023305118083953857421875e-2f + + x * (3.05023305118083953857421875e-2f + x * (-1.2897425331175327301025390625e-2f + x * 2.38926825113594532012939453125e-3f))))))))); } @@ -2515,11 +2515,11 @@ static inline float asin(float x0) { // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|], // [1e-20;.9999999999999999]); // avg error: 1.1105439e-06, max error 1.3187528e-06 - v = 1.57079517841339111328125f + - x * (-0.21450997889041900634765625f + - x * (8.78556668758392333984375e-2f + - x * (-4.489909112453460693359375e-2f + - x * (1.928029954433441162109375e-2f + + v = 1.57079517841339111328125f + + x * (-0.21450997889041900634765625f + + x * (8.78556668758392333984375e-2f + + x * (-4.489909112453460693359375e-2f + + x * (1.928029954433441162109375e-2f + x * (-4.3095736764371395111083984375e-3f))))); } @@ -2541,7 +2541,7 @@ static inline uniform float asin(uniform float x0) { uniform float x = abs(x0); uniform bool isnan = (x > 1); uniform float v; - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __asin_uniform_float(x0); } @@ -2555,15 +2555,15 @@ static inline uniform float asin(uniform float x0) { // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|], // [|single...|], [1e-20;.9999999999999999]); // avg error: 8.5716801e-09, max error: 2.1373853e-07 - v = 1.57079637050628662109375f + - x * (-0.21460501849651336669921875f + - x * (8.9116774499416351318359375e-2f + - x * (-5.146093666553497314453125e-2f + - x * (3.7269376218318939208984375e-2f + - x * (-3.5882405936717987060546875e-2f + + v = 1.57079637050628662109375f + + x * (-0.21460501849651336669921875f + + x * (8.9116774499416351318359375e-2f + + x * (-5.146093666553497314453125e-2f + + x * (3.7269376218318939208984375e-2f + + x * (-3.5882405936717987060546875e-2f + x * (4.14929799735546112060546875e-2f + x * (-4.25077490508556365966796875e-2f + - x * (3.05023305118083953857421875e-2f + + x * (3.05023305118083953857421875e-2f + x * (-1.2897425331175327301025390625e-2f + x * 2.38926825113594532012939453125e-3f))))))))); } @@ -2573,11 +2573,11 @@ static inline uniform float asin(uniform float x0) { // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|], // [1e-20;.9999999999999999]); // avg error: 1.1105439e-06, max error 1.3187528e-06 - v = 1.57079517841339111328125f + - x * (-0.21450997889041900634765625f + - x * (8.78556668758392333984375e-2f + - x * (-4.489909112453460693359375e-2f + - x * (1.928029954433441162109375e-2f + + v = 1.57079517841339111328125f + + x * (-0.21450997889041900634765625f + + x * (8.78556668758392333984375e-2f + + x * (-4.489909112453460693359375e-2f + + x * (1.928029954433441162109375e-2f + x * (-4.3095736764371395111083984375e-3f))))); } @@ -2595,7 +2595,7 @@ static inline uniform float asin(uniform float x0) { __declspec(safe) static inline float cos(float x_full) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __cos_varying_float(x_full); } @@ -2610,7 +2610,7 @@ static inline float cos(float x_full) { } return ret; } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { static const float pi_over_two_vec = 1.57079637050628662109375; static const float two_over_pi_vec = 0.636619746685028076171875; @@ -2660,7 +2660,7 @@ static inline float cos(float x_full) { __declspec(safe) static inline uniform float cos(uniform float x_full) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __cos_uniform_float(x_full); } @@ -2668,7 +2668,7 @@ static inline uniform float cos(uniform float x_full) { __math_lib == __math_lib_svml) { return __stdlib_cosf(x_full); } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { static const uniform float pi_over_two_vec = 1.57079637050628662109375; static const uniform float two_over_pi_vec = 0.636619746685028076171875; @@ -2729,7 +2729,7 @@ static inline uniform float cos(uniform float x_full) { __declspec(safe) static inline float acos(float v) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) return __acos_varying_float(v); else return 1.57079637050628662109375 - asin(v); @@ -2737,7 +2737,7 @@ static inline float acos(float v) { __declspec(safe) static inline double acos(const double v) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) return __acos_varying_double(v); else return 1.57079637050628662109375d0 - asin(v); @@ -2746,7 +2746,7 @@ static inline double acos(const double v) { __declspec(safe) static inline uniform float acos(uniform float v) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) return __acos_uniform_float(v); else return 1.57079637050628662109375 - asin(v); @@ -2754,7 +2754,7 @@ static inline uniform float acos(uniform float v) { __declspec(safe) static inline uniform double acos(const uniform double v) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) return __acos_uniform_double(v); else return 1.57079637050628662109375d0 - asin(v); @@ -2762,9 +2762,9 @@ static inline uniform double acos(const uniform double v) { __declspec(safe) -static inline void sincos(float x_full, varying float * uniform sin_result, +static inline void sincos(float x_full, varying float * uniform sin_result, varying float * uniform cos_result) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { __sincos_varying_float(x_full,sin_result,cos_result); } @@ -2779,7 +2779,7 @@ static inline void sincos(float x_full, varying float * uniform sin_result, *cos_result = insert(*cos_result, i, c); } } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { const float pi_over_two_vec = 1.57079637050628662109375; const float two_over_pi_vec = 0.636619746685028076171875; @@ -2838,7 +2838,7 @@ static inline void sincos(float x_full, varying float * uniform sin_result, __declspec(safe) static inline void sincos(uniform float x_full, uniform float * uniform sin_result, uniform float * uniform cos_result) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { __sincos_uniform_float(x_full, sin_result, cos_result); } @@ -2846,7 +2846,7 @@ static inline void sincos(uniform float x_full, uniform float * uniform sin_resu __math_lib == __math_lib_svml) { __stdlib_sincosf(x_full, sin_result, cos_result); } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { const uniform float pi_over_two_vec = 1.57079637050628662109375; const uniform float two_over_pi_vec = 0.636619746685028076171875; @@ -2904,7 +2904,7 @@ static inline void sincos(uniform float x_full, uniform float * uniform sin_resu __declspec(safe) static inline float tan(float x_full) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __tan_varying_float(x_full); } @@ -2919,7 +2919,7 @@ static inline float tan(float x_full) { } return ret; } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { const float pi_over_four_vec = 0.785398185253143310546875; const float four_over_pi_vec = 1.27323949337005615234375; @@ -2987,7 +2987,7 @@ static inline float tan(float x_full) { __declspec(safe) static inline uniform float tan(uniform float x_full) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __tan_uniform_float(x_full); } @@ -2995,7 +2995,7 @@ static inline uniform float tan(uniform float x_full) { __math_lib == __math_lib_svml) { return __stdlib_tanf(x_full); } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { const uniform float pi_over_four_vec = 0.785398185253143310546875; const uniform float four_over_pi_vec = 1.27323949337005615234375; @@ -3063,7 +3063,7 @@ static inline uniform float tan(uniform float x_full) { __declspec(safe) static inline float atan(float x_full) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __atan_varying_float(x_full); } @@ -3078,7 +3078,7 @@ static inline float atan(float x_full) { } return ret; } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { const float pi_over_two_vec = 1.57079637050628662109375; // atan(-x) = -atan(x) (so flip from negative to positive first) @@ -3118,7 +3118,7 @@ static inline float atan(float x_full) { __declspec(safe) static inline uniform float atan(uniform float x_full) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __atan_uniform_float(x_full); } @@ -3126,7 +3126,7 @@ static inline uniform float atan(uniform float x_full) { __math_lib == __math_lib_svml) { return __stdlib_atanf(x_full); } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { const uniform float pi_over_two_vec = 1.57079637050628662109375; // atan(-x) = -atan(x) (so flip from negative to positive first) @@ -3166,7 +3166,7 @@ static inline uniform float atan(uniform float x_full) { __declspec(safe) static inline float atan2(float y, float x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __atan2_varying_float(y,x); } @@ -3181,7 +3181,7 @@ static inline float atan2(float y, float x) { } return ret; } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { const float pi_vec = 3.1415926536; const float pi_over_two_vec = 1.5707963267; @@ -3209,7 +3209,7 @@ static inline float atan2(float y, float x) { __declspec(safe) static inline uniform float atan2(uniform float y, uniform float x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __atan2_uniform_float(y,x); } @@ -3217,7 +3217,7 @@ static inline uniform float atan2(uniform float y, uniform float x) { __math_lib == __math_lib_svml) { return __stdlib_atan2f(y, x); } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { const uniform float pi_vec = 3.1415927410125732421875; const uniform float pi_over_two_vec = 1.57079637050628662109375; @@ -3249,7 +3249,7 @@ static inline float exp(float x_full) { return ret; } else if (__math_lib == __math_lib_ispc_fast) { - float z = floor(1.44269504088896341f * x_full + 0.5f); + float z = floor(1.44269504088896341f * x_full + 0.5f); int n; x_full -= z * 0.693359375f; x_full -= z * -2.12194440e-4f; @@ -3324,7 +3324,7 @@ static inline uniform float exp(uniform float x_full) { return __stdlib_expf(x_full); } else if (__math_lib == __math_lib_ispc_fast) { - uniform float z = floor(1.44269504088896341f * x_full + 0.5f); + uniform float z = floor(1.44269504088896341f * x_full + 0.5f); uniform int n; x_full -= z * 0.693359375f; x_full -= z * -2.12194440e-4f; @@ -3393,7 +3393,7 @@ static inline uniform float exp(uniform float x_full) { // * log(2) + log(y) where y is the reduced range (usually in [1/2, // 1)). __declspec(safe) -static inline void __range_reduce_log(float input, varying float * uniform reduced, +static inline void __range_reduce_log(float input, varying float * uniform reduced, varying int * uniform exponent) { int int_version = intbits(input); // single precision = SEEE EEEE EMMM MMMM MMMM MMMM MMMM MMMM @@ -3424,7 +3424,7 @@ static inline void __range_reduce_log(float input, varying float * uniform reduc __declspec(safe) -static inline void __range_reduce_log(uniform float input, uniform float * uniform reduced, +static inline void __range_reduce_log(uniform float input, uniform float * uniform reduced, uniform int * uniform exponent) { uniform int int_version = intbits(input); static const uniform int nonexponent_mask = 0x807FFFFF; @@ -3458,7 +3458,7 @@ static inline float log(float x_full) { else if (__math_lib == __math_lib_ispc_fast) { int e; x_full = frexp(x_full, &e); - + int x_smaller_SQRTHF = (0.707106781186547524f > x_full) ? 0xffffffff : 0; e += x_smaller_SQRTHF; int ix_add = intbits(x_full); @@ -3482,7 +3482,7 @@ static inline float log(float x_full) { y -= 0.5f * z; z = x_full + y; return z + 0.693359375 * fe; - } + } else if (__math_lib == __math_lib_ispc) { float reduced; int exponent; @@ -3542,7 +3542,7 @@ static inline uniform float log(uniform float x_full) { else if (__math_lib == __math_lib_ispc_fast) { uniform int e; x_full = frexp(x_full, &e); - + uniform int x_smaller_SQRTHF = (0.707106781186547524f > x_full) ? 0xffffffff : 0; e += x_smaller_SQRTHF; uniform int ix_add = intbits(x_full); @@ -3630,7 +3630,7 @@ static inline float pow(float a, float b) { } return ret; } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { return exp(b * log(a)); } @@ -3645,7 +3645,7 @@ static inline uniform float pow(uniform float a, uniform float b) { __math_lib == __math_lib_svml) { return __stdlib_powf(a, b); } - else if (__math_lib == __math_lib_ispc || + else if (__math_lib == __math_lib_ispc || __math_lib == __math_lib_ispc_fast) { return exp(b * log(a)); } @@ -3686,13 +3686,13 @@ static inline QUAL double __rsqrt_safe_##QUAL##_double (QUAL double x) \ } RSQRTD(varying) -__declspec(safe) -static inline double rsqrt(double v) { +__declspec(safe) +static inline double rsqrt(double v) { if (__have_native_rsqrtd) return __rsqrt_varying_double(v); else return __rsqrt_safe_varying_double(v); -} +} RSQRTD(uniform) __declspec(safe) @@ -3748,11 +3748,11 @@ static inline uniform double frexp(uniform double x, uniform int * uniform pw2) __declspec(safe) static inline double sin(double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __sin_varying_double(x); } - else if (__math_lib == __math_lib_svml) + else if (__math_lib == __math_lib_svml) { return __svml_sind(x); } @@ -3767,11 +3767,11 @@ static inline double sin(double x) { } __declspec(safe) static inline double asin(double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __asin_varying_double(x); } - else if (__math_lib == __math_lib_svml) + else if (__math_lib == __math_lib_svml) { return __svml_asind(x); } @@ -3787,7 +3787,7 @@ static inline double asin(double x) { __declspec(safe) static inline uniform double sin(uniform double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __sin_uniform_double(x); } @@ -3797,11 +3797,11 @@ static inline uniform double sin(uniform double x) { __declspec(safe) static inline double asin(const double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __asin_varying_double(x); } - else if (__math_lib == __math_lib_svml) + else if (__math_lib == __math_lib_svml) { return __svml_asind(x); } @@ -3817,11 +3817,11 @@ static inline double asin(const double x) { __declspec(safe) static inline double cos(const double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __cos_varying_double(x); } - if (__math_lib == __math_lib_svml) + if (__math_lib == __math_lib_svml) { return __svml_cosd(x); } @@ -3837,7 +3837,7 @@ static inline double cos(const double x) { __declspec(safe) static inline uniform double cos(uniform double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __cos_uniform_double(x); } @@ -3848,11 +3848,11 @@ static inline uniform double cos(uniform double x) { __declspec(safe) static inline void sincos(double x, varying double * uniform sin_result, varying double * uniform cos_result) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { __sincos_varying_double(x,sin_result,cos_result); } - if (__math_lib == __math_lib_svml) + if (__math_lib == __math_lib_svml) { __svml_sincosd(x, sin_result, cos_result); } @@ -3869,7 +3869,7 @@ static inline void sincos(double x, varying double * uniform sin_result, __declspec(safe) static inline void sincos(uniform double x, uniform double * uniform sin_result, uniform double * uniform cos_result) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { __sincos_uniform_double(x,sin_result, cos_result); } @@ -3879,11 +3879,11 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result, __declspec(safe) static inline double tan(double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __tan_varying_double(x); } - else if (__math_lib == __math_lib_svml) + else if (__math_lib == __math_lib_svml) { return __svml_tand(x); } @@ -3899,7 +3899,7 @@ static inline double tan(double x) { __declspec(safe) static inline uniform double tan(uniform double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __tan_uniform_double(x); } @@ -3909,7 +3909,7 @@ static inline uniform double tan(uniform double x) { __declspec(safe) static inline double atan(double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __atan_varying_double(x); } @@ -3925,7 +3925,7 @@ static inline double atan(double x) { __declspec(safe) static inline uniform double atan(uniform double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __atan_uniform_double(x); } @@ -3935,11 +3935,11 @@ static inline uniform double atan(uniform double x) { __declspec(safe) static inline double atan2(double y, double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __atan2_varying_double(y,x); } - else if (__math_lib == __math_lib_svml) + else if (__math_lib == __math_lib_svml) { return __svml_atan2d(y,x); } @@ -3955,7 +3955,7 @@ static inline double atan2(double y, double x) { __declspec(safe) static inline uniform double atan2(uniform double y, uniform double x) { - if (__have_native_trigonometry) + if (__have_native_trigonometry) { return __atan2_uniform_double(y,x); } @@ -3968,7 +3968,7 @@ static inline double exp(double x) { if (__have_native_transcendentals) { return __exp_varying_double(x); } - else if (__math_lib == __math_lib_svml) + else if (__math_lib == __math_lib_svml) { return __svml_expd(x); } @@ -3996,7 +3996,7 @@ static inline double log(double x) { if (__have_native_transcendentals) { return __log_varying_double(x); } - else if (__math_lib == __math_lib_svml) + else if (__math_lib == __math_lib_svml) { return __svml_logd(x); } @@ -4024,7 +4024,7 @@ static inline double pow(double a, double b) { if (__have_native_transcendentals) { return __pow_varying_double(a,b); } - else if (__math_lib == __math_lib_svml) + else if (__math_lib == __math_lib_svml) { return __svml_powd(a,b); } @@ -4127,7 +4127,7 @@ static inline uniform int16 float_to_half(uniform float f) { // unconditional assignment here, will override with right value for // the regular case below. uniform int32 f32infty = 255ul << 23; - o = (fint > f32infty) ? 0x7e00u : 0x7c00u; + o = (fint > f32infty) ? 0x7e00u : 0x7c00u; // (De)normalized number or zero // update fint unconditionally to save the blending; we don't need it @@ -4258,14 +4258,14 @@ static inline uniform int16 float_to_half_fast(uniform float f) { uniform unsigned int32 hs = (xs >> 16); // Sign bit // Exponent unbias the single, then bias the halfp - uniform int32 hes = ((int)(xe >> 23)) - 127 + 15; + uniform int32 hes = ((int)(xe >> 23)) - 127 + 15; uniform unsigned int32 he = (hes << 10); // Exponent uniform int32 hm = (xm >> 13); // Mantissa uniform int32 ret = (hs | he | hm); if (xm & 0x00001000u) // Check for rounding // Round, might overflow to inf, this is OK - ret += 1u; + ret += 1u; return (int16)ret; } @@ -4284,14 +4284,14 @@ static inline int16 float_to_half_fast(float f) { unsigned int32 hs = (xs >> 16); // Sign bit // Exponent unbias the single, then bias the halfp - int32 hes = ((int)(xe >> 23)) - 127 + 15; + int32 hes = ((int)(xe >> 23)) - 127 + 15; unsigned int32 he = (hes << 10); // Exponent int32 hm = (xm >> 13); // Mantissa int32 ret = (hs | he | hm); if (xm & 0x00001000u) // Check for rounding // Round, might overflow to inf, this is OK - ret += 1u; + ret += 1u; return (int16)ret; } @@ -4359,7 +4359,7 @@ float_to_srgb8(float inval) }; static const uniform unsigned int almost_one = 0x3f7fffff; - + // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively. inval = max(inval, 0.0f); inval = min(inval, floatbits(almost_one)); @@ -4409,7 +4409,7 @@ float_to_srgb8(uniform float inval) }; static const uniform unsigned int almost_one = 0x3f7fffff; - + // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively. inval = max(inval, 0.0f); inval = min(inval, floatbits(almost_one)); @@ -4437,7 +4437,7 @@ static inline unsigned int random(varying RNGState * uniform state) b = ((state->z1 << 6) ^ state->z1) >> 13; state->z1 = ((state->z1 & 4294967294U) << 18) ^ b; - b = ((state->z2 << 2) ^ state->z2) >> 27; + b = ((state->z2 << 2) ^ state->z2) >> 27; state->z2 = ((state->z2 & 4294967288U) << 2) ^ b; b = ((state->z3 << 13) ^ state->z3) >> 21; state->z3 = ((state->z3 & 4294967280U) << 7) ^ b; @@ -4452,7 +4452,7 @@ static inline uniform unsigned int random(uniform RNGState * uniform state) b = ((state->z1 << 6) ^ state->z1) >> 13; state->z1 = ((state->z1 & 4294967294U) << 18) ^ b; - b = ((state->z2 << 2) ^ state->z2) >> 27; + b = ((state->z2 << 2) ^ state->z2) >> 27; state->z2 = ((state->z2 & 4294967288U) << 2) ^ b; b = ((state->z3 << 13) ^ state->z3) >> 21; state->z3 = ((state->z3 & 4294967280U) << 7) ^ b; @@ -4475,7 +4475,7 @@ static inline uniform float frandom(uniform RNGState * uniform state) return floatbits(0x3F800000 | irand)-1.0f; } -static inline void seed_rng(varying RNGState * uniform state, +static inline void seed_rng(varying RNGState * uniform state, unsigned int seed) { state->z1 = seed; state->z2 = seed ^ 0xbeeff00d; @@ -4484,7 +4484,7 @@ static inline void seed_rng(varying RNGState * uniform state, ((seed & 0xff0000ul) >> 8) | (seed & 0xff000000ul) >> 24); } -static inline void seed_rng(uniform RNGState * uniform state, +static inline void seed_rng(uniform RNGState * uniform state, uniform unsigned int seed) { state->z1 = seed; state->z2 = seed ^ 0xbeeff00d; @@ -4563,52 +4563,52 @@ static inline varying int64 saturating_add(varying int64 a, varying int64 b) { return result; } -static inline uniform unsigned int8 saturating_add(uniform unsigned int8 a, +static inline uniform unsigned int8 saturating_add(uniform unsigned int8 a, uniform unsigned int8 b) { uniform unsigned int8 result = a + b; result |= (-(uniform int8)(result < a)); return result; } -static inline varying unsigned int8 saturating_add(varying unsigned int8 a, +static inline varying unsigned int8 saturating_add(varying unsigned int8 a, varying unsigned int8 b) { return __paddus_vi8(a, b); } -static inline uniform unsigned int16 saturating_add(uniform unsigned int16 a, +static inline uniform unsigned int16 saturating_add(uniform unsigned int16 a, uniform unsigned int16 b) { uniform unsigned int16 result = a + b; result |= (-(uniform int16)(result < a)); return result; } -static inline varying unsigned int16 saturating_add(varying unsigned int16 a, +static inline varying unsigned int16 saturating_add(varying unsigned int16 a, varying unsigned int16 b) { return __paddus_vi16(a, b); } -static inline uniform unsigned int32 saturating_add(uniform unsigned int32 a, +static inline uniform unsigned int32 saturating_add(uniform unsigned int32 a, uniform unsigned int32 b) { uniform unsigned int32 result = a + b; result |= (-(uniform int32)(result < a)); return result; } -static inline varying unsigned int32 saturating_add(varying unsigned int32 a, +static inline varying unsigned int32 saturating_add(varying unsigned int32 a, varying unsigned int32 b) { varying unsigned int32 result = a + b; result |= (-(varying int32)(result < a)); return result; } -static inline uniform unsigned int64 saturating_add(uniform unsigned int64 a, +static inline uniform unsigned int64 saturating_add(uniform unsigned int64 a, uniform unsigned int64 b) { uniform unsigned int64 result = a + b; result |= (-(uniform int64)(result < a)); return result; } -static inline varying unsigned int64 saturating_add(varying unsigned int64 a, +static inline varying unsigned int64 saturating_add(varying unsigned int64 a, varying unsigned int64 b) { varying unsigned int64 result = a + b; result |= (-(varying int64)(result < a)); @@ -4677,52 +4677,52 @@ static inline varying int64 saturating_sub(varying int64 a, varying int64 b) { return result; } -static inline uniform unsigned int8 saturating_sub(uniform unsigned int8 a, +static inline uniform unsigned int8 saturating_sub(uniform unsigned int8 a, uniform unsigned int8 b) { uniform unsigned int8 result = a - b; result &= (-(uniform int8)(result <= a)); return result; } -static inline varying unsigned int8 saturating_sub(varying unsigned int8 a, +static inline varying unsigned int8 saturating_sub(varying unsigned int8 a, varying unsigned int8 b) { return __psubus_vi8(a, b); } -static inline uniform unsigned int16 saturating_sub(uniform unsigned int16 a, +static inline uniform unsigned int16 saturating_sub(uniform unsigned int16 a, uniform unsigned int16 b) { uniform unsigned int16 result = a - b; result &= (-(uniform int16)(result <= a)); return result; } -static inline varying unsigned int16 saturating_sub(varying unsigned int16 a, +static inline varying unsigned int16 saturating_sub(varying unsigned int16 a, varying unsigned int16 b) { return __psubus_vi16(a, b); } -static inline uniform unsigned int32 saturating_sub(uniform unsigned int32 a, +static inline uniform unsigned int32 saturating_sub(uniform unsigned int32 a, uniform unsigned int32 b) { uniform unsigned int32 result = a - b; result &= (-(uniform int32)(result <= a)); return result; } -static inline varying unsigned int32 saturating_sub(varying unsigned int32 a, +static inline varying unsigned int32 saturating_sub(varying unsigned int32 a, varying unsigned int32 b) { varying unsigned int32 result = a - b; result &= (-(varying int32)(result <= a)); return result; } -static inline uniform unsigned int64 saturating_sub(uniform unsigned int64 a, +static inline uniform unsigned int64 saturating_sub(uniform unsigned int64 a, uniform unsigned int64 b) { uniform unsigned int64 result = a - b; result &= (-(uniform int64)(result <= a)); return result; } -static inline varying unsigned int64 saturating_sub(varying unsigned int64 a, +static inline varying unsigned int64 saturating_sub(varying unsigned int64 a, varying unsigned int64 b) { varying unsigned int64 result = a - b; result &= (-(varying int64)(result <= a)); @@ -4783,7 +4783,7 @@ static inline uniform unsigned int8 saturating_div(uniform unsigned int8 a, return a / b; } -static inline varying unsigned int8 saturating_div(varying unsigned int8 a, +static inline varying unsigned int8 saturating_div(varying unsigned int8 a, varying unsigned int8 b) { /* No overflow possible */ return a / b; @@ -4795,13 +4795,13 @@ static inline uniform unsigned int16 saturating_div(uniform unsigned int16 a, return a / b; } -static inline varying unsigned int16 saturating_div(varying unsigned int16 a, +static inline varying unsigned int16 saturating_div(varying unsigned int16 a, varying unsigned int16 b) { /* No overflow possible */ return a / b; } -static inline uniform unsigned int32 saturating_div(uniform unsigned int32 a, +static inline uniform unsigned int32 saturating_div(uniform unsigned int32 a, uniform unsigned int32 b) { /* No overflow possible */ return a / b; @@ -4813,81 +4813,81 @@ static inline varying unsigned int32 saturating_div(varying unsigned int32 a, return a / b; } -static inline uniform unsigned int64 saturating_div(uniform unsigned int64 a, +static inline uniform unsigned int64 saturating_div(uniform unsigned int64 a, uniform unsigned int64 b) { /* No overflow possible */ return a / b; } -static inline varying unsigned int64 saturating_div(varying unsigned int64 a, +static inline varying unsigned int64 saturating_div(varying unsigned int64 a, varying unsigned int64 b) { /* No overflow possible */ return a / b; } static inline uniform int8 saturating_mul(uniform int8 a, uniform int8 b) { - uniform int16 result = (uniform int16) a * (uniform int16) b; + uniform int16 result = (uniform int16) a * (uniform int16) b; uniform unsigned int8 result2 = ((uniform unsigned int8) (a ^ b) >> 7) + INT8_MAX; uniform int8 hi = result >> 8; uniform int8 lo = result; - if (hi != (lo >> 7)) + if (hi != (lo >> 7)) result = result2; - return result; + return result; } static inline varying int8 saturating_mul(varying int8 a, varying int8 b) { - varying int16 result = (varying int16) a * (varying int16) b; + varying int16 result = (varying int16) a * (varying int16) b; varying unsigned int8 result2 = ((varying unsigned int8) (a ^ b) >> 7) + INT8_MAX; varying int8 hi = result >> 8; varying int8 lo = result; - if (hi != (lo >> 7)) + if (hi != (lo >> 7)) result = result2; - return result; + return result; } static inline uniform int16 saturating_mul(uniform int16 a, uniform int16 b) { - uniform int32 result = (uniform int32) a * (uniform int32) b; + uniform int32 result = (uniform int32) a * (uniform int32) b; uniform unsigned int16 result2 = ((uniform unsigned int16) (a ^ b) >> 15) + INT16_MAX; uniform int16 hi = result >> 16; uniform int16 lo = result; - if (hi != (lo >> 15)) + if (hi != (lo >> 15)) result = result2; - return result; + return result; } static inline varying int16 saturating_mul(varying int16 a, varying int16 b) { - varying int32 result = (varying int32) a * (varying int32) b; + varying int32 result = (varying int32) a * (varying int32) b; varying unsigned int16 result2 = ((varying unsigned int16) (a ^ b) >> 15) + INT16_MAX; varying int16 hi = result >> 16; varying int16 lo = result; - if (hi != (lo >> 15)) + if (hi != (lo >> 15)) result = result2; - return result; + return result; } static inline uniform int32 saturating_mul(uniform int32 a, uniform int32 b) { - uniform int64 result = (uniform int64) a * (uniform int64) b; + uniform int64 result = (uniform int64) a * (uniform int64) b; uniform unsigned int32 result2 = ((uniform unsigned int32) (a ^ b) >> 31) + INT32_MAX; uniform int32 hi = result >> 32; uniform int32 lo = result; - if (hi != (lo >> 31)) + if (hi != (lo >> 31)) result = result2; - return result; + return result; } static inline varying int32 saturating_mul(varying int32 a, varying int32 b) { - varying int64 result = (varying int64) a * (varying int64) b; + varying int64 result = (varying int64) a * (varying int64) b; varying unsigned int32 result2 = ((varying unsigned int32) (a ^ b) >> 31) + INT32_MAX; varying int32 hi = result >> 32; varying int32 lo = result; - if (hi != (lo >> 31)) + if (hi != (lo >> 31)) result = result2; - return result; + return result; } static inline uniform unsigned int8 saturating_mul(uniform unsigned int8 a, uniform unsigned int8 b) { - uniform unsigned int16 result = (uniform unsigned int16) a * + uniform unsigned int16 result = (uniform unsigned int16) a * (uniform unsigned int16) b; uniform unsigned int8 hi = result >> 8; uniform unsigned int8 lo = result; @@ -4896,7 +4896,7 @@ static inline uniform unsigned int8 saturating_mul(uniform unsigned int8 a, static inline varying unsigned int8 saturating_mul(varying unsigned int8 a, varying unsigned int8 b) { - varying unsigned int16 result = (varying unsigned int16) a * + varying unsigned int16 result = (varying unsigned int16) a * (varying unsigned int16) b; varying unsigned int8 hi = result >> 8; varying unsigned int8 lo = result; @@ -4905,7 +4905,7 @@ static inline varying unsigned int8 saturating_mul(varying unsigned int8 a, static inline uniform unsigned int16 saturating_mul(uniform unsigned int16 a, uniform unsigned int16 b) { - uniform unsigned int32 result = (uniform unsigned int32) a * + uniform unsigned int32 result = (uniform unsigned int32) a * (uniform unsigned int32) b; uniform unsigned int16 hi = result >> 16; uniform unsigned int16 lo = result; @@ -4914,7 +4914,7 @@ static inline uniform unsigned int16 saturating_mul(uniform unsigned int16 a, static inline varying unsigned int16 saturating_mul(varying unsigned int16 a, varying unsigned int16 b) { - varying unsigned int32 result = (varying unsigned int32) a * + varying unsigned int32 result = (varying unsigned int32) a * (varying unsigned int32) b; varying unsigned int16 hi = result >> 16; varying unsigned int16 lo = result; @@ -4923,7 +4923,7 @@ static inline varying unsigned int16 saturating_mul(varying unsigned int16 a, static inline uniform unsigned int32 saturating_mul(uniform unsigned int32 a, uniform unsigned int32 b) { - uniform unsigned int64 result = (uniform unsigned int64) a * + uniform unsigned int64 result = (uniform unsigned int64) a * (uniform unsigned int64) b; uniform unsigned int32 hi = result >> 32; uniform unsigned int32 lo = result; @@ -4932,7 +4932,7 @@ static inline uniform unsigned int32 saturating_mul(uniform unsigned int32 a, static inline varying unsigned int32 saturating_mul(varying unsigned int32 a, varying unsigned int32 b) { - varying unsigned int64 result = (varying unsigned int64) a * + varying unsigned int64 result = (varying unsigned int64) a * (varying unsigned int64) b; varying unsigned int32 hi = result >> 32; varying unsigned int32 lo = result; @@ -4941,11 +4941,11 @@ static inline varying unsigned int32 saturating_mul(varying unsigned int32 a, static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { uniform unsigned int64 ret = 0; - - uniform int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; + + uniform int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; uniform unsigned int64 a_abs = 0; uniform unsigned int64 b_abs = 0; - + if (a == INT64_MIN) // Operation "-" is undefined for "INT64_MIN", as it causes overflow. // But converting INT64_MIN to unsigned type yields the correct result, @@ -4955,17 +4955,17 @@ static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { a_abs = (uniform unsigned int64) INT64_MIN; else a_abs = (a > 0) ? a : -a; - + if (b == INT64_MIN) b_abs = (uniform unsigned int64) INT64_MIN; else - b_abs = (b > 0) ? b : -b; + b_abs = (b > 0) ? b : -b; uniform unsigned int32 a0 = a_abs & 0xFFFFFFFF; uniform unsigned int32 b0 = b_abs & 0xFFFFFFFF; uniform unsigned int32 a1 = a_abs >> 32; uniform unsigned int32 b1 = b_abs >> 32; - + if ((a1 != 0) && (b1 != 0)) { if (sign > 0) { return INT64_MAX; @@ -4974,16 +4974,16 @@ static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { return INT64_MIN; } } else if (a1 != 0) { - ret = saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 , + ret = saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 , (uniform unsigned int64) (a0) * b0); } else if (b1 != 0) { - ret = saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 , + ret = saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 , (uniform unsigned int64) (a0) * b0); } else { ret = a_abs * b_abs; } - - + + if ((sign < 0) && (ret >= (uniform unsigned int64) INT64_MIN)) { return INT64_MIN; } else if ((sign > 0) && (ret >= INT64_MAX)) { @@ -4995,32 +4995,32 @@ static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) { static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { varying unsigned int64 ret = 0; - - varying int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; + + varying int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1; varying unsigned int64 a_abs = 0; varying unsigned int64 b_abs = 0; - + if (a == INT64_MIN) // Operation "-" is undefined for "INT64_MIN", as it causes overflow. // But converting INT64_MIN to unsigned type yields the correct result, // i.e. it will be positive value -INT64_MIN. // See 6.3.1.3 section in C99 standart for more details (ISPC follows // C standard, unless it's specifically different in the language). - a_abs = (varying unsigned int64) INT64_MIN; + a_abs = (varying unsigned int64) INT64_MIN; else a_abs = (a > 0) ? a : -a; - + if (b == INT64_MIN) b_abs = (varying unsigned int64) INT64_MIN; else - b_abs = (b > 0) ? b : -b; - + b_abs = (b > 0) ? b : -b; + varying unsigned int32 a0 = a_abs & 0xFFFFFFFF; varying unsigned int32 b0 = b_abs & 0xFFFFFFFF; varying unsigned int32 a1 = a_abs >> 32; varying unsigned int32 b1 = b_abs >> 32; - + if ((a1 != 0) && (b1 != 0)) { if (sign > 0) { return INT64_MAX; @@ -5029,16 +5029,16 @@ static inline varying int64 saturating_mul(varying int64 a, varying int64 b) { return INT64_MIN; } } else if (a1 != 0) { - ret = saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 , + ret = saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 , (varying unsigned int64) (a0) * b0); } else if (b1 != 0) { - ret = saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 , + ret = saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 , (varying unsigned int64) (a0) * b0); } else { ret = a_abs * b_abs; } - - + + if ((sign < 0) && (ret >= (varying unsigned int64) INT64_MIN)) { return INT64_MIN; } else if ((sign > 0) && (ret >= INT64_MAX)) { @@ -5059,10 +5059,10 @@ static inline uniform unsigned int64 saturating_mul(uniform unsigned int64 a, if ((a1 != 0) && (b1 != 0)) { return UINT64_MAX; } else if (a1 != 0) { - return saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 , + return saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 , (uniform unsigned int64) (a0) * b0); } else if (b1 != 0) { - return saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 , + return saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 , (uniform unsigned int64) (a0) * b0); } else { return a * b; @@ -5079,10 +5079,10 @@ static inline varying unsigned int64 saturating_mul(varying unsigned int64 a, if ((a1 != 0) && (b1 != 0)) { return UINT64_MAX; } else if (a1 != 0) { - return saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 , + return saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 , (varying unsigned int64) (a0) * b0); } else if (b1 != 0) { - return saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 , + return saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 , (varying unsigned int64) (a0) * b0); } else { return a * b; From 27132e42e90a758da4ebb4090e6421964d10c08b Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Wed, 12 Mar 2014 19:29:05 +0400 Subject: [PATCH 24/28] resolving an issue with Debug Info metadata after LLVM_3_4 --- module.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/module.cpp b/module.cpp index 6aeeddfd..67e2bf0e 100644 --- a/module.cpp +++ b/module.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2013, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -949,6 +949,15 @@ Module::writeOutput(OutputType outputType, const char *outFileName, lStripUnusedDebugInfo(module); } +#if defined (LLVM_3_4) || defined (LLVM_3_5) + // In LLVM_3_4 after r195494 and r195504 revisions we should pass + // "Debug Info Version" constant to the module. LLVM will ignore + // our Debug Info metadata without it. + if (g->generateDebuggingSymbols == true) { + module->addModuleFlag(llvm::Module::Error, "Debug Info Version", llvm::DEBUG_METADATA_VERSION); + } +#endif + // First, issue a warning if the output file suffix and the type of // file being created seem to mismatch. This can help catch missing // command-line arguments specifying the output file type. From 31b95b665b2d19da92d6f3a9c34be4e78dd6b63d Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Wed, 12 Mar 2014 20:19:16 +0400 Subject: [PATCH 25/28] Copyright update --- ast.cpp | 2 +- ast.h | 2 +- builtins.cpp | 2 +- builtins.h | 2 +- builtins/builtins.c | 2 +- builtins/target-avx-common.ll | 2 +- builtins/target-avx-x2.ll | 2 +- builtins/target-avx.ll | 2 +- builtins/target-avx1-i64x4base.ll | 2 +- builtins/target-avx1-x2.ll | 2 +- builtins/target-avx1.ll | 2 +- builtins/target-avx11-x2.ll | 2 +- builtins/target-avx11.ll | 2 +- builtins/target-avx2-x2.ll | 2 +- builtins/target-avx2.ll | 2 +- builtins/target-generic-1.ll | 32 +++++++++++++++++++++++++++++++ builtins/target-generic-16.ll | 2 +- builtins/target-generic-32.ll | 2 +- builtins/target-generic-4.ll | 2 +- builtins/target-generic-64.ll | 2 +- builtins/target-generic-8.ll | 2 +- builtins/target-generic-common.ll | 2 +- builtins/target-sse2-common.ll | 2 +- builtins/target-sse2-x2.ll | 2 +- builtins/target-sse2.ll | 2 +- builtins/target-sse4-x2.ll | 2 +- builtins/target-sse4.ll | 2 +- builtins/util.m4 | 2 +- ctx.cpp | 2 +- ctx.h | 2 +- decl.h | 2 +- expr.cpp | 2 +- expr.h | 2 +- func.cpp | 2 +- func.h | 2 +- ispc.cpp | 2 +- ispc.h | 2 +- module.h | 2 +- opt.cpp | 2 +- opt.h | 2 +- stmt.cpp | 2 +- stmt.h | 2 +- sym.cpp | 2 +- sym.h | 2 +- test_static.cpp | 2 +- type.h | 2 +- util.cpp | 2 +- util.h | 2 +- 48 files changed, 79 insertions(+), 47 deletions(-) diff --git a/ast.cpp b/ast.cpp index 60b20a80..19eff152 100644 --- a/ast.cpp +++ b/ast.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2011-2012, Intel Corporation + Copyright (c) 2011-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/ast.h b/ast.h index d98c1d37..e0f864ba 100644 --- a/ast.h +++ b/ast.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2011-2012, Intel Corporation + Copyright (c) 2011-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/builtins.cpp b/builtins.cpp index a30d10b3..4795590e 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2013, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/builtins.h b/builtins.h index 14f3896e..28f58430 100644 --- a/builtins.h +++ b/builtins.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/builtins/builtins.c b/builtins/builtins.c index f6c385fb..d02ab6a4 100644 --- a/builtins/builtins.c +++ b/builtins/builtins.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2012, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-avx-common.ll b/builtins/target-avx-common.ll index 54656d9f..cf79278b 100644 --- a/builtins/target-avx-common.ll +++ b/builtins/target-avx-common.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2013, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index 69026515..5bdc547c 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index e0f4e45d..aa120260 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-avx1-i64x4base.ll b/builtins/target-avx1-i64x4base.ll index 004a8702..8f23e51c 100644 --- a/builtins/target-avx1-i64x4base.ll +++ b/builtins/target-avx1-i64x4base.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2013, Intel Corporation +;; Copyright (c) 2013-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-avx1-x2.ll b/builtins/target-avx1-x2.ll index 562d7ff0..a278e6f9 100644 --- a/builtins/target-avx1-x2.ll +++ b/builtins/target-avx1-x2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2012, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-avx1.ll b/builtins/target-avx1.ll index a9ddc112..8aaede89 100644 --- a/builtins/target-avx1.ll +++ b/builtins/target-avx1.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2013, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-avx11-x2.ll b/builtins/target-avx11-x2.ll index 1aa6345c..3da9c890 100644 --- a/builtins/target-avx11-x2.ll +++ b/builtins/target-avx11-x2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2012, Intel Corporation +;; Copyright (c) 2012-2013, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-avx11.ll b/builtins/target-avx11.ll index c4c421a0..dd615779 100644 --- a/builtins/target-avx11.ll +++ b/builtins/target-avx11.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2012, Intel Corporation +;; Copyright (c) 2012-2013, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-avx2-x2.ll b/builtins/target-avx2-x2.ll index 053fd078..4eb6720e 100644 --- a/builtins/target-avx2-x2.ll +++ b/builtins/target-avx2-x2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2013, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-avx2.ll b/builtins/target-avx2.ll index 20ecef47..c9e21e65 100644 --- a/builtins/target-avx2.ll +++ b/builtins/target-avx2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2013, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll index a48294ba..f3e4ddba 100644 --- a/builtins/target-generic-1.ll +++ b/builtins/target-generic-1.ll @@ -1,3 +1,35 @@ +;; Copyright (c) 2012-2013, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Define the standard library builtins for the NOVEC target define(`MASK',`i32') diff --git a/builtins/target-generic-16.ll b/builtins/target-generic-16.ll index cc5644bc..9fe0dae6 100644 --- a/builtins/target-generic-16.ll +++ b/builtins/target-generic-16.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-generic-32.ll b/builtins/target-generic-32.ll index 8eb31c48..cc895c28 100644 --- a/builtins/target-generic-32.ll +++ b/builtins/target-generic-32.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-generic-4.ll b/builtins/target-generic-4.ll index d80c5b91..8ed18f67 100644 --- a/builtins/target-generic-4.ll +++ b/builtins/target-generic-4.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-generic-64.ll b/builtins/target-generic-64.ll index 6a044c41..5ab429fc 100644 --- a/builtins/target-generic-64.ll +++ b/builtins/target-generic-64.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-generic-8.ll b/builtins/target-generic-8.ll index 4353658c..47a7fe71 100644 --- a/builtins/target-generic-8.ll +++ b/builtins/target-generic-8.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index ef33ff97..8b651159 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-sse2-common.ll b/builtins/target-sse2-common.ll index ad1d88bc..77a5c551 100644 --- a/builtins/target-sse2-common.ll +++ b/builtins/target-sse2-common.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index 4bee3241..2707134b 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index 7f82f933..0f13b46f 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index 70e3d01e..6dc81308 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index 18f0d80e..59e80a24 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/util.m4 b/builtins/util.m4 index ad0149ad..01f4e03f 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2013, Intel Corporation +;; Copyright (c) 2010-2014, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/ctx.cpp b/ctx.cpp index 43964af3..7e487857 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2013, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/ctx.h b/ctx.h index f04b08dd..f47777ff 100644 --- a/ctx.h +++ b/ctx.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2013, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/decl.h b/decl.h index 1a240fd7..5bd366ec 100644 --- a/decl.h +++ b/decl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2012, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/expr.cpp b/expr.cpp index d448f891..45291c0b 100644 --- a/expr.cpp +++ b/expr.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2013, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/expr.h b/expr.h index 38617e8e..b539ff1b 100644 --- a/expr.h +++ b/expr.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2012, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/func.cpp b/func.cpp index 9dbcbcfd..82cb70ce 100644 --- a/func.cpp +++ b/func.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2011-2013, Intel Corporation + Copyright (c) 2011-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/func.h b/func.h index 88a96dbc..3019eeb1 100644 --- a/func.h +++ b/func.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2011-2012, Intel Corporation + Copyright (c) 2011-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/ispc.cpp b/ispc.cpp index 26c215b5..12898a8d 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2013, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/ispc.h b/ispc.h index 5e554bf7..111524ce 100644 --- a/ispc.h +++ b/ispc.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2013, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/module.h b/module.h index e117f933..c1350063 100644 --- a/module.h +++ b/module.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2012, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/opt.cpp b/opt.cpp index cf44f485..85319ce7 100644 --- a/opt.cpp +++ b/opt.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2013, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/opt.h b/opt.h index 63c5d5b4..1e3584b9 100644 --- a/opt.h +++ b/opt.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/stmt.cpp b/stmt.cpp index 52d25fe9..14c4146b 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2013, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/stmt.h b/stmt.h index 7ed1f0ef..fb34c801 100644 --- a/stmt.h +++ b/stmt.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2012, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/sym.cpp b/sym.cpp index 05f9996a..396ec488 100644 --- a/sym.cpp +++ b/sym.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2012, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/sym.h b/sym.h index 761c3612..5840fcdb 100644 --- a/sym.h +++ b/sym.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2012, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/test_static.cpp b/test_static.cpp index 27a5b136..c27e2741 100644 --- a/test_static.cpp +++ b/test_static.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/type.h b/type.h index 0337be6e..a9c57902 100644 --- a/type.h +++ b/type.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2012, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/util.cpp b/util.cpp index 70bf53bb..b9b5858a 100644 --- a/util.cpp +++ b/util.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2013, Intel Corporation + Copyright (c) 2010-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/util.h b/util.h index 7edf71f7..11d843c4 100644 --- a/util.h +++ b/util.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2010-2011, Intel Corporation + Copyright (c) 2010-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without From 43db682c6dfebefe9a0e963ffa8d4872782b369a Mon Sep 17 00:00:00 2001 From: jbrodman Date: Thu, 13 Mar 2014 06:07:56 -0700 Subject: [PATCH 26/28] Fix bugs with exported varyings. --- module.cpp | 33 ++++++++++----------------------- type.cpp | 35 ++++++++++++++++++++--------------- type.h | 3 ++- 3 files changed, 32 insertions(+), 39 deletions(-) diff --git a/module.cpp b/module.cpp index 014b7f5f..ad0ef284 100644 --- a/module.cpp +++ b/module.cpp @@ -1156,11 +1156,11 @@ lContainsPtrToVarying(const StructType *st) { */ static void lEmitStructDecl(const StructType *st, std::vector *emittedStructs, - FILE *file, bool printGenericHeader=false, bool emitUnifs=true) { + FILE *file, bool emitUnifs=true) { // if we're emitting this for a generic dispatch header file and it's // struct that only contains uniforms, don't bother if we're emitting uniforms - if (printGenericHeader && !emitUnifs && !lContainsPtrToVarying(st)) { + if (!emitUnifs && !lContainsPtrToVarying(st)) { return; } @@ -1176,33 +1176,20 @@ lEmitStructDecl(const StructType *st, std::vector *emittedSt const StructType *elementStructType = lGetElementStructType(st->GetElementType(i)); if (elementStructType != NULL) - lEmitStructDecl(elementStructType, emittedStructs, file, printGenericHeader, emitUnifs); + lEmitStructDecl(elementStructType, emittedStructs, file, emitUnifs); } // And now it's safe to declare this one emittedStructs->push_back(st); - - if (printGenericHeader && lContainsPtrToVarying(st)) { - fprintf(file, "#ifndef __ISPC_STRUCT_%s%d__\n", - st->GetStructName().c_str(), - g->target->getVectorWidth()); - fprintf(file, "#define __ISPC_STRUCT_%s%d__\n", - st->GetStructName().c_str(), - g->target->getVectorWidth()); - } - else { - fprintf(file, "#ifndef __ISPC_STRUCT_%s__\n",st->GetStructName().c_str()); - fprintf(file, "#define __ISPC_STRUCT_%s__\n",st->GetStructName().c_str()); - } - fprintf(file, "struct %s", st->GetStructName().c_str()); + fprintf(file, "#ifndef __ISPC_STRUCT_%s__\n",st->GetCStructName().c_str()); + fprintf(file, "#define __ISPC_STRUCT_%s__\n",st->GetCStructName().c_str()); + + fprintf(file, "struct %s", st->GetCStructName().c_str()); if (st->GetSOAWidth() > 0) // This has to match the naming scheme in // StructType::GetCDeclaration(). fprintf(file, "_SOA%d", st->GetSOAWidth()); - if (printGenericHeader && lContainsPtrToVarying(st)) { - fprintf(file, "%d", g->target->getVectorWidth()); - } fprintf(file, " {\n"); for (int i = 0; i < st->GetElementCount(); ++i) { @@ -1219,10 +1206,10 @@ lEmitStructDecl(const StructType *st, std::vector *emittedSt header file, emit their declarations. */ static void -lEmitStructDecls(std::vector &structTypes, FILE *file, bool printGenericHeader=false, bool emitUnifs=true) { +lEmitStructDecls(std::vector &structTypes, FILE *file, bool emitUnifs=true) { std::vector emittedStructs; for (unsigned int i = 0; i < structTypes.size(); ++i) - lEmitStructDecl(structTypes[i], &emittedStructs, file, printGenericHeader, emitUnifs); + lEmitStructDecl(structTypes[i], &emittedStructs, file, emitUnifs); } @@ -1938,7 +1925,7 @@ Module::writeDispatchHeader(DispatchHeaderInfo *DHI) { lEmitVectorTypedefs(exportedVectorTypes, f); lEmitEnumDecls(exportedEnumTypes, f); } - lEmitStructDecls(exportedStructTypes, f, true, DHI->EmitUnifs); + lEmitStructDecls(exportedStructTypes, f, DHI->EmitUnifs); // Update flags DHI->EmitUnifs = false; diff --git a/type.cpp b/type.cpp index 2e9d831e..e0e36182 100644 --- a/type.cpp +++ b/type.cpp @@ -456,15 +456,9 @@ AtomicType::GetCDeclaration(const std::string &name) const { ret += name; } - if (variability == Variability::Varying || - variability == Variability::SOA) { + if (variability == Variability::SOA) { char buf[32]; - // get program count - // g->mangleFunctionsNamesWithTarget - hack check for void * - int vWidth = (variability == Variability::Varying) ? - g->target->getVectorWidth() : - variability.soaWidth; - sprintf(buf, "[%d]", vWidth); + sprintf(buf, "[%d]", variability.soaWidth); ret += buf; } @@ -1096,20 +1090,27 @@ PointerType::GetCDeclaration(const std::string &name) const { } std::string ret = baseType->GetCDeclaration(""); + + bool baseIsBasicVarying = (IsBasicType(baseType)) && (baseType->IsVaryingType()); + + if (baseIsBasicVarying) ret += std::string("("); ret += std::string(" *"); if (isConst) ret += " const"; ret += std::string(" "); ret += name; + if (baseIsBasicVarying) ret += std::string(")"); - if (variability == Variability::SOA || - variability == Variability::Varying) { - int vWidth = (variability == Variability::Varying) ? - g->target->getVectorWidth() : - variability.soaWidth; + if (variability == Variability::SOA) { char buf[32]; - sprintf(buf, "[%d]", vWidth); + sprintf(buf, "[%d]", variability.soaWidth); ret += buf; } + if (baseIsBasicVarying) { + int vWidth = g->target->getVectorWidth(); + char buf[32]; + sprintf(buf, "[%d]", vWidth); + ret += buf; + } return ret; } @@ -1890,6 +1891,10 @@ StructType::StructType(const std::string &n, const llvm::SmallVector "Foo".) */ - const std::string &GetStructName() const { return name; } + const std::string &GetStructName() const { return name; } + const std::string GetCStructName() const; private: static bool checkIfCanBeSOA(const StructType *st); From 02d55f24f6b20f3338b2f80a71b8510602c08985 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Mon, 17 Mar 2014 14:42:55 +0400 Subject: [PATCH 27/28] adding const to Atomic::Void type --- ctx.cpp | 6 +++--- decl.cpp | 16 ++++++++-------- expr.cpp | 21 +++++++++++---------- func.cpp | 2 +- module.cpp | 4 ++-- parse.yy | 8 ++++---- type.cpp | 14 +++++++------- 7 files changed, 36 insertions(+), 35 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index 7e487857..57cbe7ae 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -276,7 +276,7 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, disableGSWarningCount = 0; const Type *returnType = function->GetReturnType(); - if (!returnType || Type::Equal(returnType, AtomicType::Void)) + if (!returnType || returnType->IsVoidType()) returnValuePtr = NULL; else { llvm::Type *ftype = returnType->LLVMType(g->ctx); @@ -1244,7 +1244,7 @@ FunctionEmitContext::GetLabels() { void FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { const Type *returnType = function->GetReturnType(); - if (Type::Equal(returnType, AtomicType::Void)) { + if (returnType->IsVoidType()) { if (expr != NULL) Error(expr->pos, "Can't return non-void type \"%s\" from void function.", expr->GetType()->GetString().c_str()); @@ -3516,7 +3516,7 @@ FunctionEmitContext::ReturnInst() { rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock); } else { - AssertPos(currentPos, Type::Equal(function->GetReturnType(), AtomicType::Void)); + AssertPos(currentPos, function->GetReturnType()->IsVoidType()); rinst = llvm::ReturnInst::Create(*g->ctx, bblock); } diff --git a/decl.cpp b/decl.cpp index 8a10543b..2bdb6c10 100644 --- a/decl.cpp +++ b/decl.cpp @@ -80,19 +80,19 @@ lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) { } if ((typeQualifiers & TYPEQUAL_UNIFORM) != 0) { - if (Type::Equal(type, AtomicType::Void)) + if (type->IsVoidType()) Error(pos, "\"uniform\" qualifier is illegal with \"void\" type."); else type = type->GetAsUniformType(); } else if ((typeQualifiers & TYPEQUAL_VARYING) != 0) { - if (Type::Equal(type, AtomicType::Void)) + if (type->IsVoidType()) Error(pos, "\"varying\" qualifier is illegal with \"void\" type."); else type = type->GetAsVaryingType(); } else { - if (Type::Equal(type, AtomicType::Void) == false) + if (type->IsVoidType() == false) type = type->GetAsUnboundVariabilityType(); } @@ -392,7 +392,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { type = refType; } else if (kind == DK_ARRAY) { - if (Type::Equal(baseType, AtomicType::Void)) { + if (baseType->IsVoidType()) { Error(pos, "Arrays of \"void\" type are illegal."); return; } @@ -454,7 +454,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { "function parameter declaration for parameter \"%s\".", lGetStorageClassName(d->declSpecs->storageClass), decl->name.c_str()); - if (Type::Equal(decl->type, AtomicType::Void)) { + if (decl->type->IsVoidType()) { Error(decl->pos, "Parameter with type \"void\" illegal in function " "parameter list."); decl->type = NULL; @@ -625,7 +625,7 @@ Declaration::GetVariableDeclarations() const { continue; } - if (Type::Equal(decl->type, AtomicType::Void)) + if (decl->type->IsVoidType()) Error(decl->pos, "\"void\" type variable illegal in declaration."); else if (CastType(decl->type) == NULL) { decl->type = decl->type->ResolveUnboundVariability(Variability::Varying); @@ -689,7 +689,7 @@ GetStructTypesNamesPositions(const std::vector &sd, // FIXME: making this fake little DeclSpecs here is really // disgusting DeclSpecs ds(type); - if (Type::Equal(type, AtomicType::Void) == false) { + if (type->IsVoidType() == false) { if (type->IsUniformType()) ds.typeQualifiers |= TYPEQUAL_UNIFORM; else if (type->IsVaryingType()) @@ -703,7 +703,7 @@ GetStructTypesNamesPositions(const std::vector &sd, Declarator *d = (*sd[i]->declarators)[j]; d->InitFromDeclSpecs(&ds); - if (Type::Equal(d->type, AtomicType::Void)) + if (d->type->IsVoidType()) Error(d->pos, "\"void\" type illegal for struct member."); elementTypes->push_back(d->type); diff --git a/expr.cpp b/expr.cpp index 45291c0b..1544be53 100644 --- a/expr.cpp +++ b/expr.cpp @@ -209,14 +209,14 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, if (Type::Equal(toType, fromType)) return true; - if (Type::Equal(fromType, AtomicType::Void)) { + if (fromType->IsVoidType()) { if (!failureOk) Error(pos, "Can't convert from \"void\" to \"%s\" for %s.", toType->GetString().c_str(), errorMsgBase); return false; } - if (Type::Equal(toType, AtomicType::Void)) { + if (toType->IsVoidType()) { if (!failureOk) Error(pos, "Can't convert type \"%s\" to \"void\" for %s.", fromType->GetString().c_str(), errorMsgBase); @@ -342,7 +342,8 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr, return false; } else if (PointerType::IsVoidPointer(toPointerType)) { - if (fromPointerType->GetBaseType()->IsConstType()) { + if (fromPointerType->GetBaseType()->IsConstType() && + !(toPointerType->GetBaseType()->IsConstType())) { if (!failureOk) Error(pos, "Can't convert pointer to const \"%s\" to void pointer.", fromPointerType->GetString().c_str()); @@ -3611,7 +3612,7 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { const FunctionType *ft = lGetFunctionType(func); AssertPos(pos, ft != NULL); - bool isVoidFunc = Type::Equal(ft->GetReturnType(), AtomicType::Void); + bool isVoidFunc = ft->GetReturnType()->IsVoidType(); // Automatically convert function call args to references if needed. // FIXME: this should move to the TypeCheck() method... (but the @@ -3898,7 +3899,7 @@ FunctionCallExpr::TypeCheck() { if (fptrType->IsVaryingType()) { const Type *retType = funcType->GetReturnType(); - if (Type::Equal(retType, AtomicType::Void) == false && + if (retType->IsVoidType() == false && retType->IsUniformType()) { Error(pos, "Illegal to call a varying function pointer that " "points to a function with a uniform return type \"%s\".", @@ -4606,7 +4607,7 @@ IndexExpr::TypeCheck() { if (!CastType(baseExprType->GetReferenceTarget())) { if (const PointerType *pt = CastType(baseExprType)) { - if (Type::Equal(AtomicType::Void, pt->GetBaseType())) { + if (pt->GetBaseType()->IsVoidType()) { Error(pos, "Illegal to dereference void pointer type \"%s\".", baseExprType->GetString().c_str()); return NULL; @@ -6800,7 +6801,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const { return NULL; } - if (Type::Equal(toType, AtomicType::Void)) { + if (toType->IsVoidType()) { // emit the code for the expression in case it has side-effects but // then we're done. (void)expr->GetValue(ctx); @@ -7163,10 +7164,10 @@ TypeCastExpr::TypeCheck() { toType = lDeconstifyType(toType); // Anything can be cast to void... - if (Type::Equal(toType, AtomicType::Void)) + if (toType->IsVoidType()) return this; - if (Type::Equal(fromType, AtomicType::Void) || + if (fromType->IsVoidType() || (fromType->IsVaryingType() && toType->IsUniformType())) { Error(pos, "Can't type cast from type \"%s\" to type \"%s\"", fromType->GetString().c_str(), toType->GetString().c_str()); @@ -7589,7 +7590,7 @@ PtrDerefExpr::TypeCheck() { } if (const PointerType *pt = CastType(type)) { - if (Type::Equal(AtomicType::Void, pt->GetBaseType())) { + if (pt->GetBaseType()->IsVoidType()) { Error(pos, "Illegal to dereference void pointer type \"%s\".", type->GetString().c_str()); return NULL; diff --git a/func.cpp b/func.cpp index 82cb70ce..82df6b27 100644 --- a/func.cpp +++ b/func.cpp @@ -427,7 +427,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, // issue a warning. Also need to warn if it's the entry block for // the function (in which case it will not have predeccesors but is // still reachable.) - if (Type::Equal(type->GetReturnType(), AtomicType::Void) == false && + if (type->GetReturnType()->IsVoidType() == false && (pred_begin(ec.bblock) != pred_end(ec.bblock) || (ec.bblock == entryBBlock))) Warning(sym->pos, "Missing return statement in function returning \"%s\".", type->rType->GetString().c_str()); diff --git a/module.cpp b/module.cpp index 0b4d11cd..0a79e736 100644 --- a/module.cpp +++ b/module.cpp @@ -426,7 +426,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE return; } - if (Type::Equal(type, AtomicType::Void)) { + if (type->IsVoidType()) { Error(pos, "\"void\" type global variable is illegal."); return; } @@ -818,7 +818,7 @@ Module::AddFunctionDeclaration(const std::string &name, "exported function \"%s\"", name.c_str()); if (functionType->isTask && - Type::Equal(functionType->GetReturnType(), AtomicType::Void) == false) + functionType->GetReturnType()->IsVoidType() == false) Error(pos, "Task-qualified functions must have void return type."); if (functionType->isExported || functionType->isExternC) diff --git a/parse.yy b/parse.yy index 9a0377c5..39693b70 100644 --- a/parse.yy +++ b/parse.yy @@ -617,7 +617,7 @@ rate_qualified_type_specifier { if ($2 == NULL) $$ = NULL; - else if (Type::Equal($2, AtomicType::Void)) { + else if ($2->IsVoidType()) { Error(@1, "\"uniform\" qualifier is illegal with \"void\" type."); $$ = NULL; } @@ -628,7 +628,7 @@ rate_qualified_type_specifier { if ($2 == NULL) $$ = NULL; - else if (Type::Equal($2, AtomicType::Void)) { + else if ($2->IsVoidType()) { Error(@1, "\"varying\" qualifier is illegal with \"void\" type."); $$ = NULL; } @@ -1081,7 +1081,7 @@ specifier_qualifier_list { if ($2 != NULL) { if ($1 == TYPEQUAL_UNIFORM) { - if (Type::Equal($2, AtomicType::Void)) { + if ($2->IsVoidType()) { Error(@1, "\"uniform\" qualifier is illegal with \"void\" type."); $$ = NULL; } @@ -1089,7 +1089,7 @@ specifier_qualifier_list $$ = $2->GetAsUniformType(); } else if ($1 == TYPEQUAL_VARYING) { - if (Type::Equal($2, AtomicType::Void)) { + if ($2->IsVoidType()) { Error(@1, "\"varying\" qualifier is illegal with \"void\" type."); $$ = NULL; } diff --git a/type.cpp b/type.cpp index de7b5275..5d0154aa 100644 --- a/type.cpp +++ b/type.cpp @@ -228,7 +228,7 @@ Type::IsReferenceType() const { bool Type::IsVoidType() const { - return this == AtomicType::Void; + return EqualIgnoringConst(this, AtomicType::Void); } bool @@ -290,7 +290,7 @@ AtomicType::GetAsUnsignedType() const { const AtomicType * AtomicType::GetAsConstType() const { - if (basicType == TYPE_VOID || isConst == true) + if (isConst == true) return this; if (asOtherConstType == NULL) { @@ -303,7 +303,7 @@ AtomicType::GetAsConstType() const { const AtomicType * AtomicType::GetAsNonConstType() const { - if (basicType == TYPE_VOID || isConst == false) + if (isConst == false) return this; if (asOtherConstType == NULL) { @@ -380,8 +380,8 @@ AtomicType::ResolveUnboundVariability(Variability v) const { std::string AtomicType::GetString() const { std::string ret; + if (isConst) ret += "const "; if (basicType != TYPE_VOID) { - if (isConst) ret += "const "; ret += variability.GetString(); ret += " "; } @@ -1167,7 +1167,7 @@ PointerType::LLVMType(llvm::LLVMContext *ctx) const { if (ftype != NULL) ptype = llvm::PointerType::get(ftype->LLVMFunctionType(ctx), 0); else { - if (baseType == AtomicType::Void) + if (baseType->IsVoidType()) ptype = LLVMTypes::VoidPointerType; else ptype = llvm::PointerType::get(baseType->LLVMType(ctx), 0); @@ -1235,7 +1235,7 @@ ArrayType::ArrayType(const Type *c, int a) : SequentialType(ARRAY_TYPE), child(c), numElements(a) { // 0 -> unsized array. Assert(numElements >= 0); - Assert(Type::Equal(c, AtomicType::Void) == false); + Assert(c->IsVoidType() == false); } @@ -3015,7 +3015,7 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const { Assert(m->errorCount > 0); return NULL; } - Assert(Type::Equal(paramTypes[i], AtomicType::Void) == false); + Assert(paramTypes[i]->IsVoidType() == false); llvm::Type *t = paramTypes[i]->LLVMType(ctx); if (t == NULL) { From 96fe6e4fb6cfe48cfc5e20b2a8156f2c82a2dd9e Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Tue, 18 Mar 2014 23:01:31 +0400 Subject: [PATCH 28/28] fail_db.txt update on Linux --- fail_db.txt | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fail_db.txt b/fail_db.txt index 1b255cbc..dd0fbdcf 100644 --- a/fail_db.txt +++ b/fail_db.txt @@ -257,15 +257,12 @@ ./tests/reduce-equal-5.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.4 -O0 * ./tests/reduce-equal-6.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.4 -O0 * ./tests/reduce-equal-8.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.4 -O0 * -./tests/foreach-double-1.ispc runfail x86 avx2-i32x8 Linux LLVM 3.5 clang++3.4 -O2 * -./tests/foreach-double-1.ispc runfail x86 avx2-i32x16 Linux LLVM 3.5 clang++3.4 -O2 * -./tests/foreach-double-1.ispc runfail x86 avx2-i64x4 Linux LLVM 3.5 clang++3.4 -O2 * -./tests/ptr-int-1.ispc runfail x86 avx2-i32x8 Linux LLVM 3.4 clang++3.4 -O2 * -./tests/ptr-int-1.ispc runfail x86 avx2-i32x16 Linux LLVM 3.4 clang++3.4 -O2 * -./tests/ptr-int-1.ispc runfail x86 avx2-i64x4 Linux LLVM 3.4 clang++3.4 -O2 * -./tests/ptr-int-1.ispc runfail x86 avx2-i32x8 Linux LLVM 3.5 clang++3.4 -O2 * -./tests/ptr-int-1.ispc runfail x86 avx2-i32x16 Linux LLVM 3.5 clang++3.4 -O2 * -./tests/ptr-int-1.ispc runfail x86 avx2-i64x4 Linux LLVM 3.5 clang++3.4 -O2 * .\tests\foreach-double-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.5 cl -O2 * .\tests\foreach-double-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.5 cl -O2 * .\tests\foreach-double-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.5 cl -O2 * +./tests/ptr-22.ispc runfail x86-64 generic-4 Linux LLVM 3.3 clang++3.4 -O0 * +./tests/ptr-22.ispc runfail x86-64 generic-16 Linux LLVM 3.3 clang++3.4 -O0 * +./tests/ptr-22.ispc runfail x86-64 generic-4 Linux LLVM 3.4 clang++3.4 -O0 * +./tests/ptr-22.ispc runfail x86-64 generic-16 Linux LLVM 3.4 clang++3.4 -O0 * +./tests/ptr-22.ispc runfail x86-64 generic-4 Linux LLVM 3.5 clang++3.4 -O0 * +./tests/ptr-22.ispc runfail x86-64 generic-16 Linux LLVM 3.5 clang++3.4 -O0 *