Merge pull request #637 from dbabokin/egaburov-avx2-i64x4

Windows support and testing update for avx1.1-i64x4 and avx2-i64x4 targets.
This commit is contained in:
Dmitry Babokin
2013-10-18 03:25:10 -07:00
4 changed files with 84 additions and 8 deletions

View File

@@ -212,10 +212,10 @@ def check_targets():
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
if AVX11 == False and "rdrand" in f_lines[i]:
AVX11 = True;
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
if AVX2 == False and "avx2" in f_lines[i]:
AVX2 = True;
answer = answer + ["avx2-i32x8", "avx2-i32x16"]
answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
if current_OS == "MacOS":
f_lines = take_lines("sysctl machdep.cpu.features", "first")
if "SSE2" in f_lines:
@@ -229,10 +229,10 @@ def check_targets():
answer = answer + ["avx1-i32x8", "avx1-i32x16", "avx1-i64x4"]
if "RDRAND" in f_lines:
AVX11 = True;
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16"]
answer = answer + ["avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4"]
if "AVX2.0" in f_lines:
AVX2 = True;
answer = answer + ["avx2-i32x8", "avx2-i32x16"]
answer = answer + ["avx2-i32x8", "avx2-i32x16", "avx2-i64x4"]
answer = answer + ["generic-4", "generic-16", "generic-8", "generic-1", "generic-32", "generic-64"]
# now check what targets we have with the help of SDE
@@ -257,9 +257,9 @@ def check_targets():
if AVX == False and "snb" in f_lines[i]:
answer_sde = answer_sde + [["-snb", "avx1-i32x8"], ["-snb", "avx1-i32x16"], ["-snb", "avx1-i64x4"]]
if AVX11 == False and "ivb" in f_lines[i]:
answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"]]
answer_sde = answer_sde + [["-ivb", "avx1.1-i32x8"], ["-ivb", "avx1.1-i32x16"], ["-ivb", "avx1.1-i64x4"]]
if AVX2 == False and "hsw" in f_lines[i]:
answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"]]
answer_sde = answer_sde + [["-hsw", "avx2-i32x8"], ["-hsw", "avx2-i32x16"], ["-hsw", "avx2-i64x4"]]
return [answer, answer_sde]
def build_ispc(version_LLVM, make):

View File

@@ -1025,3 +1025,38 @@
./tests/reduce-equal.ispc compfail x86-64 avx2-i32x8 Mac LLVM 3.4 clang++3.3 -O2 *
./tests/test-141.ispc runfail x86 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
./tests/test-141.ispc runfail x86-64 avx2-i32x16 Mac LLVM 3.4 clang++3.3 -O2 *
.\tests\exclusive-scan-add-10.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\exclusive-scan-add-9.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\max-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\min-uint-2.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\packed-load-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\packed-store.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint64-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-max-uint.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\uint64-max-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\uint64-max.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\uint64-min-1.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\uint64-min.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\exclusive-scan-add-10.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\exclusive-scan-add-9.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\max-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\min-uint-2.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\packed-load-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\packed-store.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint64-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-add-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-max-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\uint64-max-1.ispc runfail x86 avx2-i64x4 Windows LLVM 3.3 cl -O2 *
.\tests\reduce-min-uint64.ispc runfail x86 avx1.1-i64x4 Windows LLVM 3.4 cl -O2 *
.\tests\reduce-min-uint.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *
.\tests\reduce-min-uint64.ispc runfail x86 avx2-i64x4 Windows LLVM 3.4 cl -O2 *

View File

@@ -28,10 +28,14 @@
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-x2-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-i64x4-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx11-i64x4-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-x2-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-i64x4-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-avx2-i64x4-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-c-32.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-c-64.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-dispatch.cpp" />
@@ -323,6 +327,24 @@
<Message>Building gen-bitcode-avx11-x2-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-avx11-i64x4.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit &gt; $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
<Message>Building gen-bitcode-avx11-i64x4-32bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-avx11-i64x4.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit &gt; $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
<Message>Building gen-bitcode-avx11-i64x4-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-avx2.ll">
<FileType>Document</FileType>
@@ -359,6 +381,24 @@
<Message>Building gen-bitcode-avx2-x2-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-avx2-i64x4.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit &gt; $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
<Message>Building gen-bitcode-avx2-i64x4-32bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-avx2-i64x4.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit &gt; $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll</AdditionalInputs>
<Message>Building gen-bitcode-avx2-i64x4-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-generic-1.ll">
<FileType>Document</FileType>

View File

@@ -454,8 +454,9 @@ def verify():
check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"],
["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"],
["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8",
"sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", "avx1.1-i32x16",
"avx2-i32x8", "avx2-i32x16", "generic-1", "generic-4", "generic-8",
"sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8",
"avx1.1-i32x16", "avx1.1-i64x4", "avx2-i32x8", "avx2-i32x16", "avx2-i64x4",
"generic-1", "generic-4", "generic-8",
"generic-16", "generic-32", "generic-64"]]
for i in range (0,len(f_lines)):
if f_lines[i][0] == "%":