Merge pull request #637 from dbabokin/egaburov-avx2-i64x4
Windows support and testing update for avx1.1-i64x4 and avx2-i64x4 targets.
This commit is contained in:
12
alloy.py
12
alloy.py
@@ -212,10 +212,10 @@ def check_targets():
|
|||||||
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
|
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
|
||||||
if AVX11 == False and "rdrand" in f_lines[i]:
|
if AVX11 == False and "rdrand" in f_lines[i]:
|
||||||
AVX11 = True;
|
AVX11 = True;
|
||||||
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
|
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
|
||||||
if AVX2 == False and "avx2" in f_lines[i]:
|
if AVX2 == False and "avx2" in f_lines[i]:
|
||||||
AVX2 = True;
|
AVX2 = True;
|
||||||
answer = answer + ["avx2-i32x8", "avx2-i32x16"]
|
answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
|
||||||
if current_OS == "MacOS":
|
if current_OS == "MacOS":
|
||||||
f_lines = take_lines("sysctl machdep.cpu.features", "first")
|
f_lines = take_lines("sysctl machdep.cpu.features", "first")
|
||||||
if "SSE2" in f_lines:
|
if "SSE2" in f_lines:
|
||||||
@@ -229,10 +229,10 @@ def check_targets():
|
|||||||
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
|
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
|
||||||
if "RDRAND" in f_lines:
|
if "RDRAND" in f_lines:
|
||||||
AVX11 = True;
|
AVX11 = True;
|
||||||
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
|
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
|
||||||
if "AVX2.0" in f_lines:
|
if "AVX2.0" in f_lines:
|
||||||
AVX2 = True;
|
AVX2 = True;
|
||||||
answer = answer + ["avx2-i32x8", "avx2-i32x16"]
|
answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
|
||||||
|
|
||||||
answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"]
|
answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"]
|
||||||
# now check what targets we have with the help of SDE
|
# now check what targets we have with the help of SDE
|
||||||
@@ -257,9 +257,9 @@ def check_targets():
|
|||||||
if AVX == False and "snb" in f_lines[i]:
|
if AVX == False and "snb" in f_lines[i]:
|
||||||
answer_sde = answer_sde + [["-snb", "avx1-i32x8"], ["-snb", "avx1-i32x16"], ["-snb", "avx1-i64x4"]]
|
answer_sde = answer_sde + [["-snb", "avx1-i32x8"], ["-snb", "avx1-i32x16"], ["-snb", "avx1-i64x4"]]
|
||||||
if AVX11 == False and "ivb" in f_lines[i]:
|
if AVX11 == False and "ivb" in f_lines[i]:
|
||||||
answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"]]
|
answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"], ["-ivb", "avx1.1-i64x4"]]
|
||||||
if AVX2 == False and "hsw" in f_lines[i]:
|
if AVX2 == False and "hsw" in f_lines[i]:
|
||||||
answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"]]
|
answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"], ["-hsw", "avx2-i64x4"]]
|
||||||
return [answer, answer_sde]
|
return [answer, answer_sde]
|
||||||
|
|
||||||
def build_ispc(version_LLVM, make):
|
def build_ispc(version_LLVM, make):
|
||||||
|
|||||||
35
fail_db.txt
35
fail_db.txt
@@ -1025,3 +1025,38 @@
|
|||||||
./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 *
|
./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 *
|
||||||
./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
|
./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
|
||||||
./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
|
./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
|
||||||
|
.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\exclusive-scan-add-9.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\uint64-max-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\uint64-max.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\uint64-min-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\uint64-min.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\uint64-max-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
|
||||||
|
.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 *
|
||||||
|
.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *
|
||||||
|
.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *
|
||||||
|
|||||||
40
ispc.vcxproj
40
ispc.vcxproj
@@ -28,10 +28,14 @@
|
|||||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-64bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-64bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-32bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-32bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-64bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-64bit.cpp" />
|
||||||
|
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-i64x4-32bit.cpp" />
|
||||||
|
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-i64x4-64bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-32bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-32bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-64bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-64bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-32bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-32bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-64bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-64bit.cpp" />
|
||||||
|
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-i64x4-32bit.cpp" />
|
||||||
|
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-i64x4-64bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-c-32.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-c-32.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-c-64.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-c-64.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-dispatch.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-dispatch.cpp" />
|
||||||
@@ -323,6 +327,24 @@
|
|||||||
<Message>Building gen-bitcode-avx11-x2-64bit.cpp</Message>
|
<Message>Building gen-bitcode-avx11-x2-64bit.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-avx11-i64x4.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp</Command>
|
||||||
|
<Outputs>$(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp</Outputs>
|
||||||
|
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||||
|
<Message>Building gen-bitcode-avx11-i64x4-32bit.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-avx11-i64x4.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp</Command>
|
||||||
|
<Outputs>$(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp</Outputs>
|
||||||
|
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||||
|
<Message>Building gen-bitcode-avx11-i64x4-64bit.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="builtins\target-avx2.ll">
|
<CustomBuild Include="builtins\target-avx2.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
@@ -359,6 +381,24 @@
|
|||||||
<Message>Building gen-bitcode-avx2-x2-64bit.cpp</Message>
|
<Message>Building gen-bitcode-avx2-x2-64bit.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-avx2-i64x4.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp</Command>
|
||||||
|
<Outputs>$(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp</Outputs>
|
||||||
|
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||||
|
<Message>Building gen-bitcode-avx2-i64x4-32bit.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-avx2-i64x4.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp</Command>
|
||||||
|
<Outputs>$(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp</Outputs>
|
||||||
|
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
|
||||||
|
<Message>Building gen-bitcode-avx2-i64x4-64bit.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="builtins\target-generic-1.ll">
|
<CustomBuild Include="builtins\target-generic-1.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
|
|||||||
@@ -454,8 +454,9 @@ def verify():
|
|||||||
check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"],
|
check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"],
|
||||||
["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"],
|
["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"],
|
||||||
["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8",
|
["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8",
|
||||||
"sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", "avx1.1-i32x16",
|
"sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8",
|
||||||
"avx2-i32x8", "avx2-i32x16", "generic-1", "generic-4", "generic-8",
|
"avx1.1-i32x16", "avx1.1-i64x4", "avx2-i32x8", "avx2-i32x16", "avx2-i64x4",
|
||||||
|
"generic-1", "generic-4", "generic-8",
|
||||||
"generic-16", "generic-32", "generic-64"]]
|
"generic-16", "generic-32", "generic-64"]]
|
||||||
for i in range (0,len(f_lines)):
|
for i in range (0,len(f_lines)):
|
||||||
if f_lines[i][0] == "%":
|
if f_lines[i][0] == "%":
|
||||||
|
|||||||
Reference in New Issue
Block a user