diff --git a/alloy.py b/alloy.py
index 19497b35..cdfc9127 100755
--- a/alloy.py
+++ b/alloy.py
@@ -212,10 +212,10 @@ def check_targets():
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
if AVX11 == False and "rdrand" in f_lines[i]:
AVX11 = True;
- answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
+ answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
if AVX2 == False and "avx2" in f_lines[i]:
AVX2 = True;
- answer = answer + ["avx2-i32x8", "avx2-i32x16"]
+ answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
if current_OS == "MacOS":
f_lines = take_lines("sysctl machdep.cpu.features", "first")
if "SSE2" in f_lines:
@@ -229,10 +229,10 @@ def check_targets():
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
if "RDRAND" in f_lines:
AVX11 = True;
- answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
+ answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
if "AVX2.0" in f_lines:
AVX2 = True;
- answer = answer + ["avx2-i32x8", "avx2-i32x16"]
+ answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"]
# now check what targets we have with the help of SDE
@@ -257,9 +257,9 @@ def check_targets():
if AVX == False and "snb" in f_lines[i]:
answer_sde = answer_sde + [["-snb", "avx1-i32x8"], ["-snb", "avx1-i32x16"], ["-snb", "avx1-i64x4"]]
if AVX11 == False and "ivb" in f_lines[i]:
- answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"]]
+ answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"], ["-ivb", "avx1.1-i64x4"]]
if AVX2 == False and "hsw" in f_lines[i]:
- answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"]]
+ answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"], ["-hsw", "avx2-i64x4"]]
return [answer, answer_sde]
def build_ispc(version_LLVM, make):
diff --git a/fail_db.txt b/fail_db.txt
index f1aaaab2..9c43c7f0 100644
--- a/fail_db.txt
+++ b/fail_db.txt
@@ -1025,3 +1025,38 @@
./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 *
./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
+.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\exclusive-scan-add-9.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\uint64-max-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\uint64-max.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\uint64-min-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\uint64-min.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\uint64-max-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
+.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 *
+.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *
+.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *
diff --git a/ispc.vcxproj b/ispc.vcxproj
index 58fa5b08..b9a3b6c5 100755
--- a/ispc.vcxproj
+++ b/ispc.vcxproj
@@ -28,10 +28,14 @@
+
+
+
+
@@ -323,6 +327,24 @@
Building gen-bitcode-avx11-x2-64bit.cpp
+
+
+ Document
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp
+ $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp
+ builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll
+ Building gen-bitcode-avx11-i64x4-32bit.cpp
+
+
+
+
+ Document
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp
+ $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp
+ builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll
+ Building gen-bitcode-avx11-i64x4-64bit.cpp
+
+
Document
@@ -359,6 +381,24 @@
Building gen-bitcode-avx2-x2-64bit.cpp
+
+
+ Document
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp
+ $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp
+ builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll
+ Building gen-bitcode-avx2-i64x4-32bit.cpp
+
+
+
+
+ Document
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp
+ $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp
+ builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll
+ Building gen-bitcode-avx2-i64x4-64bit.cpp
+
+
Document
diff --git a/run_tests.py b/run_tests.py
index b5391e1f..40851a40 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -454,8 +454,9 @@ def verify():
check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"],
["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"],
["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8",
- "sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", "avx1.1-i32x16",
- "avx2-i32x8", "avx2-i32x16", "generic-1", "generic-4", "generic-8",
+ "sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8",
+ "avx1.1-i32x16", "avx1.1-i64x4", "avx2-i32x8", "avx2-i32x16", "avx2-i64x4",
+ "generic-1", "generic-4", "generic-8",
"generic-16", "generic-32", "generic-64"]]
for i in range (0,len(f_lines)):
if f_lines[i][0] == "%":