diff --git a/alloy.py b/alloy.py index 19497b35..cdfc9127 100755 --- a/alloy.py +++ b/alloy.py @@ -212,10 +212,10 @@ def check_targets(): answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"] if AVX11 == False and "rdrand" in f_lines[i]: AVX11 = True; - answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"] + answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"] if AVX2 == False and "avx2" in f_lines[i]: AVX2 = True; - answer = answer + ["avx2-i32x8", "avx2-i32x16"] + answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"] if current_OS == "MacOS": f_lines = take_lines("sysctl machdep.cpu.features", "first") if "SSE2" in f_lines: @@ -229,10 +229,10 @@ def check_targets(): answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"] if "RDRAND" in f_lines: AVX11 = True; - answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"] + answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"] if "AVX2.0" in f_lines: AVX2 = True; - answer = answer + ["avx2-i32x8", "avx2-i32x16"] + answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"] answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"] # now check what targets we have with the help of SDE @@ -257,9 +257,9 @@ def check_targets(): if AVX == False and "snb" in f_lines[i]: answer_sde = answer_sde + [["-snb", "avx1-i32x8"], ["-snb", "avx1-i32x16"], ["-snb", "avx1-i64x4"]] if AVX11 == False and "ivb" in f_lines[i]: - answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"]] + answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"], ["-ivb", "avx1.1-i64x4"]] if AVX2 == False and "hsw" in f_lines[i]: - answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"]] + answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"], ["-hsw", "avx2-i64x4"]] return [answer, answer_sde] def build_ispc(version_LLVM, make): diff --git a/fail_db.txt b/fail_db.txt index f1aaaab2..9c43c7f0 100644 --- a/fail_db.txt +++ b/fail_db.txt @@ -1025,3 +1025,38 @@ ./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * ./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-min.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\uint64-max-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 * +.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 * diff --git a/ispc.vcxproj b/ispc.vcxproj index 58fa5b08..b9a3b6c5 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -28,10 +28,14 @@ + + + + @@ -323,6 +327,24 @@ Building gen-bitcode-avx11-x2-64bit.cpp + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp + $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp + builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll + Building gen-bitcode-avx11-i64x4-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp + $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp + builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll + Building gen-bitcode-avx11-i64x4-64bit.cpp + + Document @@ -359,6 +381,24 @@ Building gen-bitcode-avx2-x2-64bit.cpp + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp + $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp + builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll + Building gen-bitcode-avx2-i64x4-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp + $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp + builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll + Building gen-bitcode-avx2-i64x4-64bit.cpp + + Document diff --git a/run_tests.py b/run_tests.py index b5391e1f..40851a40 100755 --- a/run_tests.py +++ b/run_tests.py @@ -454,8 +454,9 @@ def verify(): check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"], ["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"], ["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8", - "sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", "avx1.1-i32x16", - "avx2-i32x8", "avx2-i32x16", "generic-1", "generic-4", "generic-8", + "sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", + "avx1.1-i32x16", "avx1.1-i64x4", "avx2-i32x8", "avx2-i32x16", "avx2-i64x4", + "generic-1", "generic-4", "generic-8", "generic-16", "generic-32", "generic-64"]] for i in range (0,len(f_lines)): if f_lines[i][0] == "%":