diff --git a/alloy.py b/alloy.py index 68013865..f88f2abf 100755 --- a/alloy.py +++ b/alloy.py @@ -208,7 +208,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, from_validation) else: try_do_LLVM("configure release version ", - 'cmake -G "Visual Studio 10" -DCMAKE_INSTALL_PREFIX="..\\'+ LLVM_BIN + + 'cmake -G "Visual Studio 11" -DCMAKE_INSTALL_PREFIX="..\\'+ LLVM_BIN + '" -DLLVM_LIT_TOOLS_DIR="C:\\gnuwin32\\bin" ..\\' + LLVM_SRC, from_validation) else: @@ -356,6 +356,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, print_debug("Folder: " + os.environ["ISPC_HOME"] + "\n", False, "") date = datetime.datetime.now() print_debug("Date: " + date.strftime('%H:%M %d/%m/%Y') + "\n", False, "") + newest_LLVM="3.4" # *** *** *** # Stability validation run # *** *** *** @@ -442,7 +443,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, if len(archs) == 0: archs = ["x86", "x86-64"] if len(LLVM) == 0: - LLVM = ["3.3", "trunk"] + LLVM = [newest_LLVM, "trunk"] gen_archs = ["x86-64"] need_LLVM = check_LLVM(LLVM) for i in range(0,len(need_LLVM)): @@ -524,8 +525,8 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, performance.ref = "ispc_ref.exe" performance.perf_target = "" performance.in_file = "." + os.sep + f_date + os.sep + "performance.log" -# prepare LLVM 3.3 as newest LLVM - need_LLVM = check_LLVM(["3.3"]) +# prepare newest LLVM + need_LLVM = check_LLVM([newest_LLVM]) if len(need_LLVM) != 0: build_LLVM(need_LLVM[0], "", "", "", False, False, False, True, False, make) if perf_llvm == False: @@ -542,7 +543,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, #try_do_LLVM("stash current branch ", "git stash", True) try_do_LLVM("checkout reference branch " + reference_branch + " ", "git checkout " + reference_branch, True) sys.stdout.write(".\n") - build_ispc("3.3", make) + build_ispc(newest_LLVM, make) sys.stdout.write(".\n") if current_OS != "Windows": os.rename("ispc", "ispc_ref") @@ -553,12 +554,12 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, if stashing: try_do_LLVM("return current branch ", "git stash pop", True) sys.stdout.write("You can interrupt script now.\n") - build_ispc("3.3", make) + build_ispc(newest_LLVM, make) else: # build compiler with two different LLVM versions if len(check_LLVM([reference_branch])) != 0: error("you haven't got llvm called " + reference_branch, 1) - build_ispc("3.3", make) + build_ispc(newest_LLVM, make) os.rename("ispc", "ispc_ref") build_ispc(reference_branch, make) # begin validation run for performance. output is inserted into perf() diff --git a/builtins.cpp b/builtins.cpp index fbc0d5a0..d4c8426e 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -527,12 +527,20 @@ lSetInternalFunctions(llvm::Module *module) { "__packed_load_active", "__packed_store_active", "__packed_store_active2", + "__padds_vi8", + "__padds_vi16", + "__paddus_vi8", + "__paddus_vi16", "__popcnt_int32", "__popcnt_int64", "__prefetch_read_uniform_1", "__prefetch_read_uniform_2", "__prefetch_read_uniform_3", "__prefetch_read_uniform_nt", + "__psubs_vi8", + "__psubs_vi16", + "__psubus_vi8", + "__psubus_vi16", "__rcp_uniform_float", "__rcp_varying_float", "__rcp_uniform_double", diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index b3a77871..68a67133 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -40,6 +40,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() +saturation_arithmetic() include(`target-avx-common.ll') diff --git a/builtins/target-avx1-i64x4base.ll b/builtins/target-avx1-i64x4base.ll index a6601a28..19b47b1d 100644 --- a/builtins/target-avx1-i64x4base.ll +++ b/builtins/target-avx1-i64x4base.ll @@ -40,6 +40,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() +saturation_arithmetic() include(`target-avx-common.ll') diff --git a/builtins/target-avx1.ll b/builtins/target-avx1.ll index 9c86cab8..a9ddc112 100644 --- a/builtins/target-avx1.ll +++ b/builtins/target-avx1.ll @@ -32,6 +32,7 @@ include(`target-avx.ll') rdrand_decls() +saturation_arithmetic() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-avx11.ll b/builtins/target-avx11.ll index fea0a7c2..c4c421a0 100644 --- a/builtins/target-avx11.ll +++ b/builtins/target-avx11.ll @@ -35,6 +35,8 @@ ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()', LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', `rdrand_definition()') +saturation_arithmetic() + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-avx2.ll b/builtins/target-avx2.ll index f4a0ee07..20ecef47 100644 --- a/builtins/target-avx2.ll +++ b/builtins/target-avx2.ll @@ -39,6 +39,8 @@ ifelse(LLVM_VERSION, `LLVM_3_0', `rdrand_decls()', LLVM_VERSION, `LLVM_3_1', `rdrand_decls()', `rdrand_definition()') +saturation_arithmetic() + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll index e4cef4aa..2c221118 100644 --- a/builtins/target-generic-1.ll +++ b/builtins/target-generic-1.ll @@ -11,6 +11,7 @@ scans() int64minmax() aossoa() declare_nvptx() +saturation_arithmetic_novec() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; masked store diff --git a/builtins/target-generic-16.ll b/builtins/target-generic-16.ll index 807fd242..cc5644bc 100644 --- a/builtins/target-generic-16.ll +++ b/builtins/target-generic-16.ll @@ -31,4 +31,4 @@ define(`WIDTH',`16') include(`target-generic-common.ll') - +saturation_arithmetic_novec() diff --git a/builtins/target-generic-32.ll b/builtins/target-generic-32.ll index 5f89bcdf..8eb31c48 100644 --- a/builtins/target-generic-32.ll +++ b/builtins/target-generic-32.ll @@ -31,3 +31,4 @@ define(`WIDTH',`32') include(`target-generic-common.ll') +saturation_arithmetic_novec() diff --git a/builtins/target-generic-4.ll b/builtins/target-generic-4.ll index 7eb1f300..d80c5b91 100644 --- a/builtins/target-generic-4.ll +++ b/builtins/target-generic-4.ll @@ -31,4 +31,4 @@ define(`WIDTH',`4') include(`target-generic-common.ll') - +saturation_arithmetic_novec() diff --git a/builtins/target-generic-64.ll b/builtins/target-generic-64.ll index 09443f8e..6a044c41 100644 --- a/builtins/target-generic-64.ll +++ b/builtins/target-generic-64.ll @@ -31,3 +31,4 @@ define(`WIDTH',`64') include(`target-generic-common.ll') +saturation_arithmetic_novec() diff --git a/builtins/target-generic-8.ll b/builtins/target-generic-8.ll index bd9261ff..4353658c 100644 --- a/builtins/target-generic-8.ll +++ b/builtins/target-generic-8.ll @@ -31,4 +31,4 @@ define(`WIDTH',`8') include(`target-generic-common.ll') - +saturation_arithmetic_novec() diff --git a/builtins/target-nvptx.ll b/builtins/target-nvptx.ll index 1901ba5e..e39cbedf 100644 --- a/builtins/target-nvptx.ll +++ b/builtins/target-nvptx.ll @@ -2260,3 +2260,5 @@ define void @__memory_barrier() nounwind readnone alwaysinline { call void @llvm.nvvm.membar.gl() ret void } + +saturation_arithmetic_novec(); diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index bfb927e5..9dcb064f 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -44,6 +44,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() +saturation_arithmetic() include(`target-sse2-common.ll') diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index 93a8eb93..6a5709fd 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -41,6 +41,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() +saturation_arithmetic() include(`target-sse2-common.ll') diff --git a/builtins/target-sse4-16.ll b/builtins/target-sse4-16.ll index 0de5c1b4..c8f72d45 100644 --- a/builtins/target-sse4-16.ll +++ b/builtins/target-sse4-16.ll @@ -41,6 +41,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() +saturation_arithmetic() include(`target-sse4-common.ll') diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll index 79f44212..4b394734 100644 --- a/builtins/target-sse4-8.ll +++ b/builtins/target-sse4-8.ll @@ -41,6 +41,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() +saturation_arithmetic() include(`target-sse4-common.ll') diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index ceff27f0..e87f4640 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -44,6 +44,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() +saturation_arithmetic() include(`target-sse4-common.ll') diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index 9e2ac8a5..9819d385 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -41,6 +41,7 @@ stdlib_core() packed_load_and_store() scans() int64minmax() +saturation_arithmetic() include(`target-sse4-common.ll') diff --git a/builtins/util-nvptx.m4 b/builtins/util-nvptx.m4 index 19fcf68c..97a316de 100644 --- a/builtins/util-nvptx.m4 +++ b/builtins/util-nvptx.m4 @@ -3415,3 +3415,62 @@ define(`define_avgs', ` define_up_avgs() define_down_avgs() ') + +;;;;;;;;;;;;;;;;;;;; + +define(`const_vector', `<$1 $2>') +define(`saturation_arithmetic_novec_universal', ` +define @__p$1s_vi8(, ) { + %v0_i16 = sext %0 to + %v1_i16 = sext %1 to + %res = $1 %v0_i16, %v1_i16 + %over_mask = icmp sgt %res, const_vector(i16, 127) + %over_res = select %over_mask, const_vector(i16, 127), %res + %under_mask = icmp slt %res, const_vector(i16, -128) + %ret_i16 = select %under_mask, const_vector(i16, -128), %over_res + %ret = trunc %ret_i16 to + ret %ret +} + +define @__p$1s_vi16(, ) { + %v0_i32 = sext %0 to + %v1_i32 = sext %1 to + %res = $1 %v0_i32, %v1_i32 + %over_mask = icmp sgt %res, const_vector(i32, 32767) + %over_res = select %over_mask, const_vector(i32, 32767), %res + %under_mask = icmp slt %res, const_vector(i32, -32768) + %ret_i32 = select %under_mask, const_vector(i32, -32768), %over_res + %ret = trunc %ret_i32 to + ret %ret +} + +define @__p$1us_vi8(, ) { + %v0_i16 = zext %0 to + %v1_i16 = zext %1 to + %res = $1 %v0_i16, %v1_i16 + %over_mask = icmp ugt %res, const_vector(i16, 255) + %over_res = select %over_mask, const_vector(i16, 255), %res + %under_mask = icmp slt %res, const_vector(i16, 0) + %ret_i16 = select %under_mask, const_vector(i16, 0), %over_res + %ret = trunc %ret_i16 to + ret %ret +} + +define @__p$1us_vi16(, ) { + %v0_i32 = zext %0 to + %v1_i32 = zext %1 to + %res = $1 %v0_i32, %v1_i32 + %over_mask = icmp ugt %res, const_vector(i32, 65535) + %over_res = select %over_mask, const_vector(i32, 65535), %res + %under_mask = icmp slt %res, const_vector(i32, 0) + %ret_i32 = select %under_mask, const_vector(i32, 0), %over_res + %ret = trunc %ret_i32 to + ret %ret +} +') + +define(`saturation_arithmetic_novec', ` +saturation_arithmetic_novec_universal(sub) +saturation_arithmetic_novec_universal(add) +') + diff --git a/builtins/util.m4 b/builtins/util.m4 index 7f08adb3..776b4c9e 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -49,6 +49,416 @@ define(`MASK_HIGH_BIT_ON', ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; vector convertation utilities +;; convert vector of one width into vector of other width +;; +;; $1: vector element type +;; $2: vector of the first width +;; $3: vector of the second width + + +define(`convert1to8', ` + $3 = shufflevector <1 x $1> $2, <1 x $1> undef, + <8 x i32> +') + + +define(`convert1to16', ` + $3 = shufflevector <1 x $1> $2, <1 x $1> undef, + <16 x i32> +') + +define(`convert4to8', ` + $3 = shufflevector <4 x $1> $2, <4 x $1> undef, + <8 x i32> +') + +define(`convert4to16', ` + $3 = shufflevector <4 x $1> $2, <4 x $1> undef, + <16 x i32> +') + +define(`convert8to16', ` + $3 = shufflevector <8 x $1> $2, <8 x $1> undef, + <16 x i32> +') + +define(`convert4to32', ` + $3 = shufflevector <4 x $1> $2, <4 x $1> undef, + <32 x i32> +') + +define(`convert8to32', ` + $3 = shufflevector <4 x $1> $2, <4 x $1> undef, + <32 x i32> +') + +define(`convert16to32', ` + $3 = shufflevector <4 x $1> $2, <4 x $1> undef, + <32 x i32> +') + +define(`convert8to1', ` + $3 = shufflevector <8 x $1> $2, <8 x $1> undef, + <1 x i32> +') + + +define(`convert16to1', ` + $3 = shufflevector <16 x $1> $2, <16 x $1> undef, + <1 x i32> +') + +define(`convert8to4', ` + $3 = shufflevector <8 x $1> $2, <8 x $1> undef, + <4 x i32> +') + + +define(`convert16to4', ` + $3 = shufflevector <16 x $1> $2, <16 x $1> undef, + <4 x i32> +') + +define(`convert16to8', ` + $3 = shufflevector <16 x $1> $2, <16 x $1> undef, + <8 x i32> +') + +define(`convert32to4', ` + $3 = shufflevector <32 x $1> $2, <32 x $1> undef, + <4 x i32> +') + +define(`convert32to8', ` + $3 = shufflevector <32 x $1> $2, <32 x $1> undef, + <8 x i32> +') + +define(`convert32to16', ` + $3 = shufflevector <32 x $1> $2, <32 x $1> undef, + <16 x i32> +') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;saturation arithmetic + +define(`saturation_arithmetic', +`ifelse(WIDTH, `4', `saturation_arithmetic_vec4()', + WIDTH, `8', `saturation_arithmetic_vec8()', + WIDTH, `16', `saturation_arithmetic_vec16() ', + `errprint(`ERROR: saturation_arithmetic() macro called with unsupported width = 'WIDTH +) + m4exit(`1')') +') + +;; create vector constant. Used by saturation_arithmetic_novec_universal below. + +define(`const_vector', ` +ifelse(WIDTH, `4', `<$1 $2, $1 $2, $1 $2, $1 $2>', + WIDTH, `8', `<$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2>', + WIDTH, `16', `<$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, + $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2>', + WIDTH, `32', `<$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, + $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, + $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, + $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2>', + WIDTH, `64', `<$1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, + $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, + $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, + $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, + $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, + $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, + $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, + $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2, $1 $2>', + `<$1 $2>')') + +;; utility function used by saturation_arithmetic_novec below. This shouldn't be called by +;; target .ll files directly. +;; $1: {add,sub} (used in constructing function names) + +define(`saturation_arithmetic_novec_universal', ` +define @__p$1s_vi8(, ) { + %v0_i16 = sext %0 to + %v1_i16 = sext %1 to + %res = $1 %v0_i16, %v1_i16 + %over_mask = icmp sgt %res, const_vector(i16, 127) + %over_res = select %over_mask, const_vector(i16, 127), %res + %under_mask = icmp slt %res, const_vector(i16, -128) + %ret_i16 = select %under_mask, const_vector(i16, -128), %over_res + %ret = trunc %ret_i16 to + ret %ret +} + +define @__p$1s_vi16(, ) { + %v0_i32 = sext %0 to + %v1_i32 = sext %1 to + %res = $1 %v0_i32, %v1_i32 + %over_mask = icmp sgt %res, const_vector(i32, 32767) + %over_res = select %over_mask, const_vector(i32, 32767), %res + %under_mask = icmp slt %res, const_vector(i32, -32768) + %ret_i32 = select %under_mask, const_vector(i32, -32768), %over_res + %ret = trunc %ret_i32 to + ret %ret +} + +define @__p$1us_vi8(, ) { + %v0_i16 = zext %0 to + %v1_i16 = zext %1 to + %res = $1 %v0_i16, %v1_i16 + %over_mask = icmp ugt %res, const_vector(i16, 255) + %over_res = select %over_mask, const_vector(i16, 255), %res + %under_mask = icmp slt %res, const_vector(i16, 0) + %ret_i16 = select %under_mask, const_vector(i16, 0), %over_res + %ret = trunc %ret_i16 to + ret %ret +} + +define @__p$1us_vi16(, ) { + %v0_i32 = zext %0 to + %v1_i32 = zext %1 to + %res = $1 %v0_i32, %v1_i32 + %over_mask = icmp ugt %res, const_vector(i32, 65535) + %over_res = select %over_mask, const_vector(i32, 65535), %res + %under_mask = icmp slt %res, const_vector(i32, 0) + %ret_i32 = select %under_mask, const_vector(i32, 0), %over_res + %ret = trunc %ret_i32 to + ret %ret +} +') + +;; implementation for targets which doesn't have h/w instructions + +define(`saturation_arithmetic_novec', ` +saturation_arithmetic_novec_universal(sub) +saturation_arithmetic_novec_universal(add) +') + +;;4-wide vector saturation arithmetic + +define(`saturation_arithmetic_vec4', ` +declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone +define <4 x i8> @__padds_vi8(<4 x i8>, <4 x i8>) { + convert4to16(i8, %0, %v0) + convert4to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to4(i8, %r16, %r) + ret <4 x i8> %r +} + +declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone +define <4 x i16> @__padds_vi16(<4 x i16>, <4 x i16>) { + convert4to8(i16, %0, %v0) + convert4to8(i16, %1, %v1) + %r16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %v0, <8 x i16> %v1) + convert8to4(i16, %r16, %r) + ret <4 x i16> %r +} + +declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone +define <4 x i8> @__paddus_vi8(<4 x i8>, <4 x i8>) { + convert4to16(i8, %0, %v0) + convert4to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to4(i8, %r16, %r) + ret <4 x i8> %r +} + +declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone +define <4 x i16> @__paddus_vi16(<4 x i16>, <4 x i16>) { + convert4to8(i16, %0, %v0) + convert4to8(i16, %1, %v1) + %r16 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %v0, <8 x i16> %v1) + convert8to4(i16, %r16, %r) + ret <4 x i16> %r +} + +declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone +define <4 x i8> @__psubs_vi8(<4 x i8>, <4 x i8>) { + convert4to16(i8, %0, %v0) + convert4to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to4(i8, %r16, %r) + ret <4 x i8> %r +} + +declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone +define <4 x i16> @__psubs_vi16(<4 x i16>, <4 x i16>) { + convert4to8(i16, %0, %v0) + convert4to8(i16, %1, %v1) + %r16 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %v0, <8 x i16> %v1) + convert8to4(i16, %r16, %r) + ret <4 x i16> %r +} + +declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone +define <4 x i8> @__psubus_vi8(<4 x i8>, <4 x i8>) { + convert4to16(i8, %0, %v0) + convert4to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to4(i8, %r16, %r) + ret <4 x i8> %r +} + +declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone +define <4 x i16> @__psubus_vi16(<4 x i16>, <4 x i16>) { + convert4to8(i16, %0, %v0) + convert4to8(i16, %1, %v1) + %r16 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %v0, <8 x i16> %v1) + convert8to4(i16, %r16, %r) + ret <4 x i16> %r +} +') + +;;8-wide vector saturation arithmetic + +define(`saturation_arithmetic_vec8', ` +declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone +define <8 x i8> @__padds_vi8(<8 x i8>, <8 x i8>) { + convert8to16(i8, %0, %v0) + convert8to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to8(i8, %r16, %r) + ret <8 x i8> %r +} + +declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone +define <8 x i16> @__padds_vi16(<8 x i16> %a0, <8 x i16> %a1) { + %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) + ret <8 x i16> %res +} + +declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone +define <8 x i8> @__paddus_vi8(<8 x i8>, <8 x i8>) { + convert8to16(i8, %0, %v0) + convert8to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to8(i8, %r16, %r) + ret <8 x i8> %r +} + +declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone +define <8 x i16> @__paddus_vi16(<8 x i16> %a0, <8 x i16> %a1) { + %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) + ret <8 x i16> %res +} + +declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone +define <8 x i8> @__psubs_vi8(<8 x i8>, <8 x i8>) { + convert8to16(i8, %0, %v0) + convert8to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to8(i8, %r16, %r) + ret <8 x i8> %r +} + +declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone +define <8 x i16> @__psubs_vi16(<8 x i16> %a0, <8 x i16> %a1) { + %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) + ret <8 x i16> %res +} + +declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone +define <8 x i8> @__psubus_vi8(<8 x i8>, <8 x i8>) { + convert8to16(i8, %0, %v0) + convert8to16(i8, %1, %v1) + %r16 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %v0, <16 x i8> %v1) + convert16to8(i8, %r16, %r) + ret <8 x i8> %r +} + +declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone +define <8 x i16> @__psubus_vi16(<8 x i16> %a0, <8 x i16> %a1) { + %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) + ret <8 x i16> %res +} +') + +;;16-wide vector saturation arithmetic + +define(`saturation_arithmetic_vec16', ` +declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone +define <16 x i8> @__padds_vi8(<16 x i8> %a0, <16 x i8> %a1) { + %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone +define <16 x i16> @__padds_vi16(<16 x i16> %a0, <16 x i16> %a1) { + binary8to16(ret, i16, @llvm.x86.sse2.padds.w, %a0, %a1) + ret <16 x i16> %ret +} + +declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone +define <16 x i8> @__paddus_vi8(<16 x i8> %a0, <16 x i8> %a1) { + %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone +define <16 x i16> @__paddus_vi16(<16 x i16> %a0, <16 x i16> %a1) { + binary8to16(ret, i16, @llvm.x86.sse2.paddus.w, %a0, %a1) + ret <16 x i16> %ret +} + +declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone +define <16 x i8> @__psubs_vi8(<16 x i8> %a0, <16 x i8> %a1) { + %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone +define <16 x i16> @__psubs_vi16(<16 x i16> %a0, <16 x i16> %a1) { + binary8to16(ret, i16, @llvm.x86.sse2.psubs.w, %a0, %a1) + ret <16 x i16> %ret +} + +declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone +define <16 x i8> @__psubus_vi8(<16 x i8> %a0, <16 x i8> %a1) { + %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone +define <16 x i16> @__psubus_vi16(<16 x i16> %a0, <16 x i16> %a1) { + binary8to16(ret, i16, @llvm.x86.sse2.psubus.w, %a0, %a1) + ret <16 x i16> %ret +} +') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; vector deconstruction utilities ;; split 8-wide vector into 2 4-wide vectors ;; diff --git a/fail_db.txt b/fail_db.txt index 248a9cff..02432603 100644 --- a/fail_db.txt +++ b/fail_db.txt @@ -2,279 +2,11 @@ % The list is unordered and contains information about commonly used platforms / configurations. % Our goas is to maintain this list for Linux, MacOS and Windows with reasonably new compilers. % Note, that it's important which C++ compiler was used. The currently supported C++ compilers are -% clang 3.4 on Linux and clang 3.3 on MacOS and cl (VS2010) on Windows. +% clang 3.4 on Linux and clang 3.3 on MacOS and cl (VS2012) on Windows. % Please also note that it's very important to have correctly built LLVM. There are a number of % LLVM bugs in released versions, that we have to workaround by applying patches (see llvm_patches % folder). The recommended way to build LLVM on Unix is to use "alloy.py". % -.\tests\exclusive-scan-add-9.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-13.ispc compfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\funcptr-null-4.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\funcptr-null-5.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\funcptr-null-6.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-13.ispc compfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-13.ispc compfail x86-64 sse4-i16x8 Windows LLVM 3.3 cl -O2 * -.\tests\funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\atomics-13.ispc compfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\test-141.ispc runfail x86-64 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint64.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\test-141.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1-i64x4 Windows LLVM 3.4 cl -O2 * ./tests/atomics-13.ispc compfail x86 sse4-i16x8 Mac LLVM 3.3 clang++3.3 -O2 * ./tests/atomics-13.ispc compfail x86-64 sse4-i16x8 Mac LLVM 3.3 clang++3.3 -O2 * ./tests/funcptr-null-4.ispc runfail x86 sse4-i8x16 Mac LLVM 3.3 clang++3.3 -O2 * @@ -305,133 +37,6 @@ ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\uint64-max-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 * -.\tests\funcptr-null-4.ispc runfail x86 sse4-i8x16 Windows LLVM 3.4 cl -O2 * -.\tests\funcptr-null-5.ispc runfail x86 sse4-i8x16 Windows LLVM 3.4 cl -O2 * -.\tests\funcptr-null-6.ispc runfail x86 sse4-i8x16 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-min-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-min-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\max-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\min-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\min-uint-2.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\packed-load-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\packed-store.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\test-141.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\uint64-max.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\uint64-min-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\uint64-min.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.4 cl -O2 * -.\tests\funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.4 cl -O2 * -.\tests\funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx1.1-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx1.1-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx2-i32x8 Windows LLVM 3.4 cl -O2 * -.\tests\test-141.ispc runfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-10.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-11.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-12.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-8.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\switch-9.ispc compfail x86-64 avx2-i32x16 Windows LLVM 3.4 cl -O2 * -.\tests\reduce-equal-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * ./tests/atomics-13.ispc compfail x86 sse4-i16x8 Linux LLVM 3.3 clang++3.4 -O2 * ./tests/atomics-13.ispc compfail x86-64 sse4-i16x8 Linux LLVM 3.3 clang++3.4 -O2 * ./tests/atomics-13.ispc compfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.4 -O2 * @@ -580,8 +185,6 @@ ./tests/reduce-equal-5.ispc compfail x86-64 generic-16 Linux LLVM 3.4 clang++3.4 -O0 * ./tests/reduce-equal-6.ispc compfail x86-64 generic-16 Linux LLVM 3.4 clang++3.4 -O0 * ./tests/reduce-equal-8.ispc compfail x86-64 generic-16 Linux LLVM 3.4 clang++3.4 -O0 * -./tests/reduce-equal.ispc compfail x86-64 avx1-i32x4 Linux LLVM 3.5 clang++3.4 -O2 * -./tests/shuffle2-7.ispc compfail x86-64 avx1-i64x4 Linux LLVM 3.5 clang++3.4 -O2 * ./tests/masked-scatter-struct.ispc runfail x86-64 generic-4 Linux LLVM 3.5 clang++3.4 -O2 * ./tests/exclusive-scan-add-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.4 -O2 * ./tests/exclusive-scan-add-10.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.4 -O2 * @@ -657,3 +260,475 @@ ./tests/foreach-double-1.ispc runfail x86 avx2-i32x8 Linux LLVM 3.5 clang++3.4 -O2 * ./tests/foreach-double-1.ispc runfail x86 avx2-i32x16 Linux LLVM 3.5 clang++3.4 -O2 * ./tests/foreach-double-1.ispc runfail x86 avx2-i64x4 Linux LLVM 3.5 clang++3.4 -O2 * +./tests/ptr-int-1.ispc runfail x86 avx2-i32x8 Linux LLVM 3.4 clang++3.4 -O2 * +./tests/ptr-int-1.ispc runfail x86 avx2-i32x16 Linux LLVM 3.4 clang++3.4 -O2 * +./tests/ptr-int-1.ispc runfail x86 avx2-i64x4 Linux LLVM 3.4 clang++3.4 -O2 * +./tests/ptr-int-1.ispc runfail x86 avx2-i32x8 Linux LLVM 3.5 clang++3.4 -O2 * +./tests/ptr-int-1.ispc runfail x86 avx2-i32x16 Linux LLVM 3.5 clang++3.4 -O2 * +./tests/ptr-int-1.ispc runfail x86 avx2-i64x4 Linux LLVM 3.5 clang++3.4 -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-equal-10.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint64.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-equal-10.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint64.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\uint64-max-1.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\uint64-max.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\uint64-min-1.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\uint64-min.ispc runfail x86 sse2-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-equal-10.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint64.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-10.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-9.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-equal-10.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint64.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\uint64-max-1.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\uint64-max.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\uint64-min-1.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\uint64-min.ispc runfail x86 sse2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 sse4-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 sse4-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-13.ispc compfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 sse4-i16x8 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-equal-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-13.ispc compfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 sse4-i8x16 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x4 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint64.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 avx1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 avx1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint64.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint64.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 avx2-i32x8 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint64.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 avx2-i32x16 Windows LLVM 3.3 cl -O0 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-varyingptr-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\local-atomics-varyingptr-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\packed-store2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O0 * +.\tests\atomics-13.ispc compfail x86-64 sse4-i16x8 Windows LLVM 3.3 cl -O2 * +.\tests\atomics-13.ispc compfail x86-64 sse4-i8x16 Windows LLVM 3.3 cl -O2 * +.\tests\ptr-int-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.4 cl -O2 * +.\tests\ptr-int-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.4 cl -O2 * +.\tests\ptr-int-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 * +.\tests\ptr-int-1.ispc runfail x86 avx2-i32x8 Windows LLVM 3.5 cl -O2 * +.\tests\ptr-int-1.ispc runfail x86 avx2-i32x16 Windows LLVM 3.5 cl -O2 * +.\tests\ptr-int-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.5 cl -O2 * diff --git a/ispc.vcxproj b/ispc.vcxproj index 8aee2988..8fa9be70 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -365,12 +365,14 @@ Application true Unicode + v110 Application false false Unicode + v110 @@ -429,4 +431,4 @@ - + \ No newline at end of file diff --git a/llvm_patches/3_4_r201126-alias-gather.patch b/llvm_patches/3_4_r201126-alias-gather.patch new file mode 100644 index 00000000..9cebc3f6 --- /dev/null +++ b/llvm_patches/3_4_r201126-alias-gather.patch @@ -0,0 +1,148 @@ +Index: include/llvm/IR/IntrinsicsX86.td +=================================================================== +--- include/llvm/IR/IntrinsicsX86.td (revision 201125) ++++ include/llvm/IR/IntrinsicsX86.td (revision 201126) +@@ -1814,68 +1814,68 @@ + def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + + def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + } + + // Misc. +@@ -2974,28 +2974,28 @@ + def int_x86_avx512_gather_dpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpd512">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty, + llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx512_gather_dps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdps512">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i16_ty, + llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx512_gather_qpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpd512">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty, + llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx512_gather_qps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqps512">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i8_ty, + llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + + def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherdpd512">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_ptr_ty, + llvm_i32_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx512_gather_dps_512 : GCCBuiltin<"__builtin_ia32_gatherdps512">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_ptr_ty, + llvm_i32_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx512_gather_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherqpd512">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_ptr_ty, + llvm_i32_ty], +@@ -3003,12 +3003,12 @@ + def int_x86_avx512_gather_qps_512 : GCCBuiltin<"__builtin_ia32_gatherqps512">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8i64_ty, llvm_ptr_ty, + llvm_i32_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + + def int_x86_avx512_gather_dpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty, + llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + def int_x86_avx512_gather_dpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpi512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i16_ty, + llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty], +@@ -3020,7 +3020,7 @@ + def int_x86_avx512_gather_qpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpi512">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i8_ty, + llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], +- [IntrReadMem]>; ++ [IntrReadArgMem]>; + + def int_x86_avx512_gather_dpq_512 : GCCBuiltin<"__builtin_ia32_gatherdpq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty, llvm_ptr_ty, diff --git a/module.cpp b/module.cpp index 25db0fed..1a19e10c 100644 --- a/module.cpp +++ b/module.cpp @@ -2048,6 +2048,9 @@ void Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostream) const { clang::CompilerInstance inst; +#if defined(LLVM_3_5) + inst.createVirtualFileSystem(); +#endif inst.createFileManager(); llvm::raw_fd_ostream stderrRaw(2, false); diff --git a/opt.cpp b/opt.cpp index a54805db..4f945c4b 100644 --- a/opt.cpp +++ b/opt.cpp @@ -884,24 +884,29 @@ IntrinsicsOpt::IntrinsicsOpt() // All of the mask instructions we may encounter. Note that even if // compiling for AVX, we may still encounter the regular 4-wide SSE // MOVMSK instruction. - llvm::Function *ssei8Movmsk = - llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse2_pmovmskb_128); - maskInstructions.push_back(ssei8Movmsk); - llvm::Function *sseFloatMovmsk = - llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse_movmsk_ps); - maskInstructions.push_back(sseFloatMovmsk); - maskInstructions.push_back(m->module->getFunction("__movmsk")); - llvm::Function *avxFloatMovmsk = - llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_movmsk_ps_256); - Assert(avxFloatMovmsk != NULL); - maskInstructions.push_back(avxFloatMovmsk); + if (llvm::Function *ssei8Movmsk = + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse2_pmovmskb_128))) { + maskInstructions.push_back(ssei8Movmsk); + } + if (llvm::Function *sseFloatMovmsk = + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse_movmsk_ps))) { + maskInstructions.push_back(sseFloatMovmsk); + } + if (llvm::Function *__movmsk = + m->module->getFunction("__movmsk")) { + maskInstructions.push_back(__movmsk); + } + if (llvm::Function *avxFloatMovmsk = + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_movmsk_ps_256))) { + maskInstructions.push_back(avxFloatMovmsk); + } // And all of the blend instructions blendInstructions.push_back(BlendInstruction( - llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse41_blendvps), + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_sse41_blendvps)), 0xf, 0, 1, 2)); blendInstructions.push_back(BlendInstruction( - llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_blendv_ps_256), + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_blendv_ps_256)), 0xff, 0, 1, 2)); } @@ -933,15 +938,13 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { DEBUG_START_PASS("IntrinsicsOpt"); llvm::Function *avxMaskedLoad32 = - llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskload_ps_256); + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_maskload_ps_256)); llvm::Function *avxMaskedLoad64 = - llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskload_pd_256); + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_maskload_pd_256)); llvm::Function *avxMaskedStore32 = - llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskstore_ps_256); + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_maskstore_ps_256)); llvm::Function *avxMaskedStore64 = - llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskstore_pd_256); - Assert(avxMaskedLoad32 != NULL && avxMaskedStore32 != NULL); - Assert(avxMaskedLoad64 != NULL && avxMaskedStore64 != NULL); + m->module->getFunction(llvm::Intrinsic::getName(llvm::Intrinsic::x86_avx_maskstore_pd_256)); bool modifiedAny = false; restart: @@ -1108,20 +1111,24 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { bool IntrinsicsOpt::matchesMaskInstruction(llvm::Function *function) { - for (unsigned int i = 0; i < maskInstructions.size(); ++i) + for (unsigned int i = 0; i < maskInstructions.size(); ++i) { if (maskInstructions[i].function != NULL && - function == maskInstructions[i].function) + function == maskInstructions[i].function) { return true; + } + } return false; } IntrinsicsOpt::BlendInstruction * IntrinsicsOpt::matchingBlendInstruction(llvm::Function *function) { - for (unsigned int i = 0; i < blendInstructions.size(); ++i) + for (unsigned int i = 0; i < blendInstructions.size(); ++i) { if (blendInstructions[i].function != NULL && - function == blendInstructions[i].function) + function == blendInstructions[i].function) { return &blendInstructions[i]; + } + } return NULL; } diff --git a/perf.py b/perf.py index 65895335..30189040 100755 --- a/perf.py +++ b/perf.py @@ -533,7 +533,7 @@ def perf(options1, args): A = print_answer(answer, target_number) if options.ref != "": print_debug("\n\nREFERENCE COMPILER:\n", s, perf_log) - B = print_answer(answer_ref) + B = print_answer(answer_ref, target_number) # print perf report compare(A,B) diff --git a/stdlib.ispc b/stdlib.ispc index de0e32ed..a23dbbfa 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -57,8 +57,6 @@ #error Unknown value of ISPC_MASK_BITS #endif - - /////////////////////////////////////////////////////////////////////////// // CUDA Specific primitives // @@ -82,6 +80,44 @@ __declspec(safe,cost0) static inline uniform int __taskCount1() { return __task_ __declspec(safe,cost0) static inline uniform int __taskCount2() { return __task_count2(); } __declspec(safe,cost0) static inline uniform int __taskCount () { return __task_count (); } +/* Limits of integral types. */ +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef INT64_MAX +#define INT64_MAX (9223372036854775807) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295) +#endif +#ifndef UINT64_MAX +#define UINT64_MAX (18446744073709551615) +#endif +#ifndef INT8_MIN +#define INT8_MIN (-INT8_MAX - 1) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-INT16_MAX - 1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-INT32_MAX - 1) +#endif +#ifndef INT64_MIN +#define INT64_MIN (-INT64_MAX - 1) +#endif + /////////////////////////////////////////////////////////////////////////// // Low level primitives @@ -4414,6 +4450,108 @@ static inline void fastmath() { __fastmath(); } +/////////////////////////////////////////////////////////////////////////// +// saturation arithmetic + +static inline uniform int8 saturating_add(uniform int8 a, uniform int8 b) { + uniform unsigned int8 a_unsig = a, b_unsig = b; + uniform unsigned int8 result = a_unsig + b_unsig; + a_unsig = (a_unsig >> 7) + INT8_MAX; + if ((uniform int8) ((a_unsig ^ b_unsig) | ~(b_unsig ^ result)) >= 0) + result = a_unsig; + return result; +} + +static inline varying int8 saturating_add(varying int8 a, varying int8 b) { + return __padds_vi8(a, b); +} + +static inline uniform int16 saturating_add(uniform int16 a, uniform int16 b) { + uniform unsigned int16 a_unsig = a, b_unsig = b; + uniform unsigned int16 result = a_unsig + b_unsig; + a_unsig = (a_unsig >> 15) + INT16_MAX; + if ((uniform int16) ((a_unsig ^ b_unsig) | ~(b_unsig ^ result)) >= 0) + result = a_unsig; + return result; +} + +static inline varying int16 saturating_add(varying int16 a, varying int16 b) { + return __padds_vi16(a, b); +} + +static inline uniform unsigned int8 saturating_add(uniform unsigned int8 a, + uniform unsigned int8 b) { + uniform unsigned int8 result = a + b; + result |= (-(uniform int8)(result < a)); + return result; +} + +static inline varying unsigned int8 saturating_add(varying unsigned int8 a, + varying unsigned int8 b) { + return __paddus_vi8(a, b); +} + +static inline uniform unsigned int16 saturating_add(uniform unsigned int16 a, + uniform unsigned int16 b) { + uniform unsigned int16 result = a + b; + result |= (-(uniform int16)(result < a)); + return result; +} + +static inline varying unsigned int16 saturating_add(varying unsigned int16 a, + varying unsigned int16 b) { + return __paddus_vi16(a, b); +} + +static inline uniform int8 saturating_sub(uniform int8 a, uniform int8 b) { + uniform unsigned int8 a_unsig = a, b_unsig = b; + uniform unsigned int8 result = a_unsig - b_unsig; + a_unsig = (a_unsig >> 7) + INT8_MAX; + if ((uniform int8) ((a_unsig ^ b_unsig) & (a_unsig ^ result)) < 0) + result = a_unsig; + return result; +} + +static inline varying int8 saturating_sub(varying int8 a, varying int8 b) { + return __psubs_vi8(a, b); +} + +static inline uniform int16 saturating_sub(uniform int16 a, uniform int16 b) { + uniform unsigned int16 a_unsig = a, b_unsig = b; + uniform unsigned int16 result = a_unsig - b_unsig; + a_unsig = (a_unsig >> 15) + INT16_MAX; + if ((uniform int16) ((a_unsig ^ b_unsig) & (a_unsig ^ result)) < 0) + result = a_unsig; + return result; +} + +static inline varying int16 saturating_sub(varying int16 a, varying int16 b) { + return __psubs_vi16(a, b); +} + +static inline uniform unsigned int8 saturating_sub(uniform unsigned int8 a, + uniform unsigned int8 b) { + uniform unsigned int8 result = a - b; + result &= (-(uniform int8)(result <= a)); + return result; +} + +static inline varying unsigned int8 saturating_sub(varying unsigned int8 a, + varying unsigned int8 b) { + return __psubus_vi8(a, b); +} + +static inline uniform unsigned int16 saturating_sub(uniform unsigned int16 a, + uniform unsigned int16 b) { + uniform unsigned int16 result = a - b; + result &= (-(uniform int16)(result <= a)); + return result; +} + +static inline varying unsigned int16 saturating_sub(varying unsigned int16 a, + varying unsigned int16 b) { + return __psubus_vi16(a, b); +} /////////////////////////////////////////////////////////////////////////// // rdrand diff --git a/tests/padds_i16.ispc b/tests/padds_i16.ispc new file mode 100644 index 00000000..c763dd37 --- /dev/null +++ b/tests/padds_i16.ispc @@ -0,0 +1,27 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int16 a_max = 32767, a_min = -32768; // max and min signed int16 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_add(a_max, b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_add(a_min, -b); + } + else { + RET[programIndex] = saturating_add(a_min, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (uniform int16) 32767; + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (uniform int16) -32768; + } + else { + RET[programIndex] = (uniform int16) -32763; + } +} diff --git a/tests/padds_i8.ispc b/tests/padds_i8.ispc new file mode 100644 index 00000000..7d272828 --- /dev/null +++ b/tests/padds_i8.ispc @@ -0,0 +1,27 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int8 a_max = 127, a_min = -128; // max and min signed int8 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_add(a_max, b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_add(a_min, -b); + } + else { + RET[programIndex] = saturating_add(a_min, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (uniform int8) 127; + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (uniform int8) -128; + } + else { + RET[programIndex] = (uniform int8) -123; + } +} diff --git a/tests/padds_vi16.ispc b/tests/padds_vi16.ispc new file mode 100644 index 00000000..5834a47a --- /dev/null +++ b/tests/padds_vi16.ispc @@ -0,0 +1,27 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying int16 a_max = 32767, a_min = -32768; // max and min signed int16 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_add(a_max, b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_add(a_min, -b); + } + else { + RET[programIndex] = saturating_add(a_min, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (varying int16) 32767; + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (varying int16) -32768; + } + else { + RET[programIndex] = (varying int16) -32763; + } +} diff --git a/tests/padds_vi8.ispc b/tests/padds_vi8.ispc new file mode 100644 index 00000000..0aca03d4 --- /dev/null +++ b/tests/padds_vi8.ispc @@ -0,0 +1,27 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying int8 a_max = 127, a_min = -128; // max and min signed int8 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_add(a_max, b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_add(a_min, -b); + } + else { + RET[programIndex] = saturating_add(a_min, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (varying int8) 127; + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (varying int8) -128; + } + else { + RET[programIndex] = (varying int8) -123; + } +} diff --git a/tests/paddus_i16.ispc b/tests/paddus_i16.ispc new file mode 100644 index 00000000..2032f161 --- /dev/null +++ b/tests/paddus_i16.ispc @@ -0,0 +1,21 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int16 a_max = 65535, a_min = 0; // max and min unsigned int16 + if (programIndex % 2 == 0) { + RET[programIndex] = saturating_add(a_max, b); + } + else { + RET[programIndex] = saturating_add(a_min, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 2 == 0) { + RET[programIndex] = (uniform unsigned int16) 65535; + } + else { + RET[programIndex] = (uniform unsigned int16) 5; + } +} diff --git a/tests/paddus_i8.ispc b/tests/paddus_i8.ispc new file mode 100644 index 00000000..97436a86 --- /dev/null +++ b/tests/paddus_i8.ispc @@ -0,0 +1,21 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int8 a_max = 255, a_min = 0; // max and min unsigned int8 + if (programIndex % 2 == 0) { + RET[programIndex] = saturating_add(a_max, b); + } + else { + RET[programIndex] = saturating_add(a_min, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 2 == 0) { + RET[programIndex] = (uniform unsigned int8) 255; + } + else { + RET[programIndex] = (uniform unsigned int8) 5; + } +} diff --git a/tests/paddus_vi16.ispc b/tests/paddus_vi16.ispc new file mode 100644 index 00000000..d8bfa000 --- /dev/null +++ b/tests/paddus_vi16.ispc @@ -0,0 +1,21 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying unsigned int16 a_max = 65535, a_min = 0; // max and min unsigned int16 + if (programIndex % 2 == 0) { + RET[programIndex] = saturating_add(a_max, b); + } + else { + RET[programIndex] = saturating_add(a_min, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 2 == 0) { + RET[programIndex] = (varying unsigned int16) 65535; + } + else { + RET[programIndex] = (varying unsigned int16) 5; + } +} diff --git a/tests/paddus_vi8.ispc b/tests/paddus_vi8.ispc new file mode 100644 index 00000000..59baa6fb --- /dev/null +++ b/tests/paddus_vi8.ispc @@ -0,0 +1,22 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying unsigned int8 a_max = 255, a_min = 0; // max and min signed int8 + if (programIndex % 2 == 0) { + RET[programIndex] = saturating_add(a_max, b); + } + else { + RET[programIndex] = saturating_add(a_min, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 2 == 0) { + RET[programIndex] = (varying unsigned int8) 255; + } + else { + RET[programIndex] = (varying unsigned int8) 5; + } +} + diff --git a/tests/psubs_i16.ispc b/tests/psubs_i16.ispc new file mode 100644 index 00000000..4f27b3b4 --- /dev/null +++ b/tests/psubs_i16.ispc @@ -0,0 +1,27 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int16 a_max = 32767, a_min = -32768; // max and min signed int16 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_sub(a_min, b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_sub(a_max, -b); + } + else { + RET[programIndex] = saturating_sub(a_max, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (uniform int16) -32768; + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (uniform int16) 32767; + } + else { + RET[programIndex] = (uniform int16) 32762; + } +} diff --git a/tests/psubs_i8.ispc b/tests/psubs_i8.ispc new file mode 100644 index 00000000..e04867bd --- /dev/null +++ b/tests/psubs_i8.ispc @@ -0,0 +1,27 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform int8 a_max = 127, a_min = -128; // max and min signed int8 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_sub(a_min, b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_sub(a_max, -b); + } + else { + RET[programIndex] = saturating_sub(a_max, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (uniform int8) -128; + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (uniform int8) 127; + } + else { + RET[programIndex] = (uniform int8) 122; + } +} diff --git a/tests/psubs_vi16.ispc b/tests/psubs_vi16.ispc new file mode 100644 index 00000000..df130115 --- /dev/null +++ b/tests/psubs_vi16.ispc @@ -0,0 +1,27 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying int16 a_max = 32767, a_min = -32768; // max and min signed int16 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_sub(a_min, b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_sub(a_max, -b); + } + else { + RET[programIndex] = saturating_sub(a_max, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (varying int16) -32768; + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (varying int16) 32767; + } + else { + RET[programIndex] = (varying int16) 32762; + } +} diff --git a/tests/psubs_vi8.ispc b/tests/psubs_vi8.ispc new file mode 100644 index 00000000..d7e9ff89 --- /dev/null +++ b/tests/psubs_vi8.ispc @@ -0,0 +1,27 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying int8 a_max = 127, a_min = -128; // max and min signed int8 + if (programIndex % 3 == 0) { + RET[programIndex] = saturating_sub(a_min, b); + } + else if (programIndex % 3 == 1) { + RET[programIndex] = saturating_sub(a_max, -b); + } + else { + RET[programIndex] = saturating_sub(a_max, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 3 == 0) { + RET[programIndex] = (varying int8) -128; + } + else if (programIndex % 3 == 1) { + RET[programIndex] = (varying int8) 127; + } + else { + RET[programIndex] = (varying int8) 122; + } +} diff --git a/tests/psubus_i16.ispc b/tests/psubus_i16.ispc new file mode 100644 index 00000000..f9ae3568 --- /dev/null +++ b/tests/psubus_i16.ispc @@ -0,0 +1,21 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int16 a_max = 65535, a_min = 0; // max and min unsigned int16 + if (programIndex % 2 == 0) { + RET[programIndex] = saturating_sub(a_min, b); + } + else { + RET[programIndex] = saturating_sub(a_max, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 2 == 0) { + RET[programIndex] = (uniform unsigned int16) 0; + } + else { + RET[programIndex] = (uniform unsigned int16) 65530; + } +} diff --git a/tests/psubus_i8.ispc b/tests/psubus_i8.ispc new file mode 100644 index 00000000..e6f30b2a --- /dev/null +++ b/tests/psubus_i8.ispc @@ -0,0 +1,21 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + uniform unsigned int8 a_max = 255, a_min = 0; // max and min unsigned int8 + if (programIndex % 2 == 0) { + RET[programIndex] = saturating_sub(a_min, b); + } + else { + RET[programIndex] = saturating_sub(a_max, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 2 == 0) { + RET[programIndex] = (uniform unsigned int8) 0; + } + else { + RET[programIndex] = (uniform unsigned int8) 250; + } +} diff --git a/tests/psubus_vi16.ispc b/tests/psubus_vi16.ispc new file mode 100644 index 00000000..0974cc5e --- /dev/null +++ b/tests/psubus_vi16.ispc @@ -0,0 +1,21 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying unsigned int16 a_max = 65535, a_min = 0; // max and min unsigned int16 + if (programIndex % 2 == 0) { + RET[programIndex] = saturating_sub(a_min, b); + } + else { + RET[programIndex] = saturating_sub(a_max, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 2 == 0) { + RET[programIndex] = (varying unsigned int16) 0; + } + else { + RET[programIndex] = (varying unsigned int16) 65530; + } +} diff --git a/tests/psubus_vi8.ispc b/tests/psubus_vi8.ispc new file mode 100644 index 00000000..f7ad65d3 --- /dev/null +++ b/tests/psubus_vi8.ispc @@ -0,0 +1,21 @@ + +export uniform int width() { return programCount; } + +export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { + varying unsigned int8 a_max = 255, a_min = 0; // max and min signed int8 + if (programIndex % 2 == 0) { + RET[programIndex] = saturating_sub(a_min, b); + } + else { + RET[programIndex] = saturating_sub(a_max, b); + } +} + +export void result(uniform float RET[]) { + if (programIndex % 2 == 0) { + RET[programIndex] = (varying unsigned int8) 0; + } + else { + RET[programIndex] = (varying unsigned int8) 250; + } +}