Merge pull request #637 from dbabokin/egaburov-avx2-i64x4
Windows support and testing update for avx1.1-i64x4 and avx2-i64x4 targets.
This commit is contained in:
12
alloy.py
12
alloy.py
@@ -212,10 +212,10 @@ def check_targets():
|
||||
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
|
||||
if AVX11 == False and "rdrand" in f_lines[i]:
|
||||
AVX11 = True;
|
||||
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
|
||||
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
|
||||
if AVX2 == False and "avx2" in f_lines[i]:
|
||||
AVX2 = True;
|
||||
answer = answer + ["avx2-i32x8", "avx2-i32x16"]
|
||||
answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
|
||||
if current_OS == "MacOS":
|
||||
f_lines = take_lines("sysctl machdep.cpu.features", "first")
|
||||
if "SSE2" in f_lines:
|
||||
@@ -229,10 +229,10 @@ def check_targets():
|
||||
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
|
||||
if "RDRAND" in f_lines:
|
||||
AVX11 = True;
|
||||
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
|
||||
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
|
||||
if "AVX2.0" in f_lines:
|
||||
AVX2 = True;
|
||||
answer = answer + ["avx2-i32x8", "avx2-i32x16"]
|
||||
answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
|
||||
|
||||
answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"]
|
||||
# now check what targets we have with the help of SDE
|
||||
@@ -257,9 +257,9 @@ def check_targets():
|
||||
if AVX == False and "snb" in f_lines[i]:
|
||||
answer_sde = answer_sde + [["-snb", "avx1-i32x8"], ["-snb", "avx1-i32x16"], ["-snb", "avx1-i64x4"]]
|
||||
if AVX11 == False and "ivb" in f_lines[i]:
|
||||
answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"]]
|
||||
answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"], ["-ivb", "avx1.1-i64x4"]]
|
||||
if AVX2 == False and "hsw" in f_lines[i]:
|
||||
answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"]]
|
||||
answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"], ["-hsw", "avx2-i64x4"]]
|
||||
return [answer, answer_sde]
|
||||
|
||||
def build_ispc(version_LLVM, make):
|
||||
|
||||
35
fail_db.txt
35
fail_db.txt
@@ -1025,3 +1025,38 @@
|
||||
./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 *
|
||||
./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
|
||||
./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
|
||||
.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\exclusive-scan-add-9.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\uint64-max-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\uint64-max.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\uint64-min-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\uint64-min.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\uint64-max-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||
.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 *
|
||||
.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *
|
||||
.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *
|
||||
|
||||
40
ispc.vcxproj
40
ispc.vcxproj
@@ -28,10 +28,14 @@
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-32bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-i64x4-32bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-i64x4-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-32bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-32bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-i64x4-32bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-i64x4-64bit.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-c-32.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-c-64.cpp" />
|
||||
<ClCompile Include="$(Configuration)\gen-bitcode-dispatch.cpp" />
|
||||
@@ -323,6 +327,24 @@
|
||||
<Message>Building gen-bitcode-avx11-x2-64bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx11-i64x4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp</Command>
|
||||
<Outputs>$(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp</Outputs>
|
||||
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||
<Message>Building gen-bitcode-avx11-i64x4-32bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx11-i64x4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp</Command>
|
||||
<Outputs>$(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp</Outputs>
|
||||
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||
<Message>Building gen-bitcode-avx11-i64x4-64bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx2.ll">
|
||||
<FileType>Document</FileType>
|
||||
@@ -359,6 +381,24 @@
|
||||
<Message>Building gen-bitcode-avx2-x2-64bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx2-i64x4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp</Command>
|
||||
<Outputs>$(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp</Outputs>
|
||||
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||
<Message>Building gen-bitcode-avx2-i64x4-32bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx2-i64x4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp</Command>
|
||||
<Outputs>$(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp</Outputs>
|
||||
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||
<Message>Building gen-bitcode-avx2-i64x4-64bit.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-1.ll">
|
||||
<FileType>Document</FileType>
|
||||
|
||||
@@ -454,8 +454,9 @@ def verify():
|
||||
check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"],
|
||||
["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"],
|
||||
["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8",
|
||||
"sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", "avx1.1-i32x16",
|
||||
"avx2-i32x8", "avx2-i32x16", "generic-1", "generic-4", "generic-8",
|
||||
"sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8",
|
||||
"avx1.1-i32x16", "avx1.1-i64x4", "avx2-i32x8", "avx2-i32x16", "avx2-i64x4",
|
||||
"generic-1", "generic-4", "generic-8",
|
||||
"generic-16", "generic-32", "generic-64"]]
|
||||
for i in range (0,len(f_lines)):
|
||||
if f_lines[i][0] == "%":
|
||||
|
||||
Reference in New Issue
Block a user