diff --git a/alloy.py b/alloy.py index 21e428de..525f90d0 100755 --- a/alloy.py +++ b/alloy.py @@ -635,6 +635,7 @@ import platform import smtplib import datetime import copy +import multiprocessing from email.MIMEMultipart import MIMEMultipart from email.MIMEBase import MIMEBase from email.mime.text import MIMEText @@ -663,13 +664,14 @@ if __name__ == '__main__': "Try to build compiler with all LLVM\n\talloy.py -r --only=build\n" + "Performance validation run with 10 runs of each test and comparing to branch 'old'\n\talloy.py -r --only=performance --compare-with=old --number=10\n" + "Validation run. Update fail_db.txt with new fails, send results to my@my.com\n\talloy.py -r --update-errors=F --notify='my@my.com'\n") + num_threads="%s" % multiprocessing.cpu_count() parser = MyParser(usage="Usage: alloy.py -r/-b [options]", epilog=examples) parser.add_option('-b', '--build-llvm', dest='build_llvm', help='ask to build LLVM', default=False, action="store_true") parser.add_option('-r', '--run', dest='validation_run', help='ask for validation run', default=False, action="store_true") parser.add_option('-j', dest='speed', - help='set -j for make', default="8") + help='set -j for make', default=num_threads) # options for activity "build LLVM" llvm_group = OptionGroup(parser, "Options for building LLVM", "These options must be used with -b option.") diff --git a/cbackend.cpp b/cbackend.cpp index 8535653f..40f87074 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -241,7 +241,11 @@ namespace { class CBEMCAsmInfo : public llvm::MCAsmInfo { public: CBEMCAsmInfo() { +#if defined(LLVM_3_5) + GlobalPrefix = '\0'; +#else GlobalPrefix = ""; +#endif PrivateGlobalPrefix = ""; } }; diff --git a/examples/aobench/aobench.vcxproj b/examples/aobench/aobench.vcxproj index a5b354ce..c46ee41a 100644 --- a/examples/aobench/aobench.vcxproj +++ b/examples/aobench/aobench.vcxproj @@ -1,181 +1,16 @@  - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - + + {F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB} + Win32Proj + aobench + ao + sse2,sse4,avx1-i32x8 + + - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - {F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB} - Win32Proj - aobench - ispc - - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - ao - - - true - $(ExecutablePath);$(ProjectDir)..\.. - ao - - - false - $(ProjectDir)..\..;$(ExecutablePath) - ao - - - false - $(ProjectDir)..\..;$(ExecutablePath) - ao - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - diff --git a/examples/common.props b/examples/common.props new file mode 100644 index 00000000..7bf37005 --- /dev/null +++ b/examples/common.props @@ -0,0 +1,172 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + + Application + true + Unicode + + + Application + true + Unicode + + + Application + false + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + + + + + + + true + $(ProjectDir)..\..;$(ExecutablePath) + + + true + $(ProjectDir)..\..;$(ExecutablePath) + + + false + $(ProjectDir)..\..;$(ExecutablePath) + + + false + $(ProjectDir)..\..;$(ExecutablePath) + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + ispc + $(default_targets) + $(TargetDir)$(ISPC_file).obj + $(Target_out);$(TargetDir)$(ISPC_file)_sse2.obj + $(Target_out);$(TargetDir)$(ISPC_file)_sse4.obj + $(Target_out);$(TargetDir)$(ISPC_file)_avx.obj + $(Target_out);$(TargetDir)$(ISPC_file)_avx11.obj + $(Target_out);$(TargetDir)$(ISPC_file)_avx2.obj + + + + Document + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str) + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str) + $(Target_out);$(TargetDir)%(Filename)_ispc.h + $(Target_out);$(TargetDir)%(Filename)_ispc.h + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str) + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str) + $(Target_out);$(TargetDir)%(Filename)_ispc.h + $(Target_out);$(TargetDir)%(Filename)_ispc.h + + + + + + diff --git a/examples/deferred/deferred_shading.vcxproj b/examples/deferred/deferred_shading.vcxproj index 94e38540..cd361b26 100755 --- a/examples/deferred/deferred_shading.vcxproj +++ b/examples/deferred/deferred_shading.vcxproj @@ -1,154 +1,13 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - {87f53c53-957e-4e91-878a-bc27828fb9eb} Win32Proj - mandelbrot - ispc + deferred + kernels + sse2,sse4-x2,avx1-x2 - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - + @@ -156,24 +15,4 @@ - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - - diff --git a/examples/mandelbrot/mandelbrot.vcxproj b/examples/mandelbrot/mandelbrot.vcxproj index 1b6f1281..e7703ad0 100644 --- a/examples/mandelbrot/mandelbrot.vcxproj +++ b/examples/mandelbrot/mandelbrot.vcxproj @@ -1,176 +1,15 @@  - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1} Win32Proj mandelbrot - ispc + mandelbrot + sse2,sse4-x2,avx1-x2 - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - + - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - - diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj index fbebdc32..f8b8cfcb 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj @@ -1,181 +1,16 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - {E80DA7D4-AB22-4648-A068-327307156BE6} Win32Proj mandelbrot_tasks - ispc + mandelbrot_tasks + sse2,sse4-x2,avx1-x2 - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - true - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - false - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - false - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - + - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - - diff --git a/examples/noise/noise.vcxproj b/examples/noise/noise.vcxproj index 01456625..7adc57f3 100644 --- a/examples/noise/noise.vcxproj +++ b/examples/noise/noise.vcxproj @@ -1,176 +1,15 @@ - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD} - Win32Proj - noise - ispc - - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - - - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - - + + + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD} + Win32Proj + noise + noise + sse2,sse4,avx1-x2 + + + + + + diff --git a/examples/options/options.vcxproj b/examples/options/options.vcxproj index 77fb9353..af336aa1 100644 --- a/examples/options/options.vcxproj +++ b/examples/options/options.vcxproj @@ -1,184 +1,17 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - {8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE} Win32Proj options - ispc + options + sse2,sse4-x2,avx1-x2 - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - 4305 - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - 4305 - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - 4305 - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - 4305 - Fast - - - Console - true - true - true - - + - - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - - - diff --git a/examples/rt/rt.vcxproj b/examples/rt/rt.vcxproj index 19d40192..ea34de56 100644 --- a/examples/rt/rt.vcxproj +++ b/examples/rt/rt.vcxproj @@ -1,181 +1,16 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - {E787BC3F-2D2E-425E-A64D-4721E2FF3DC9} Win32Proj rt - ispc + rt + sse2,sse4-x2,avx1-i32x8 - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Document - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - + - - - diff --git a/examples/sort/sort.vcxproj b/examples/sort/sort.vcxproj index b37eab1c..43f2b439 100644 --- a/examples/sort/sort.vcxproj +++ b/examples/sort/sort.vcxproj @@ -1,177 +1,16 @@  - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2} Win32Proj sort - ispc + sort + sse2,sse4-x2,avx1-x2 - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - + - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - - diff --git a/examples/stencil/stencil.vcxproj b/examples/stencil/stencil.vcxproj index a96a187d..b5f5bb22 100644 --- a/examples/stencil/stencil.vcxproj +++ b/examples/stencil/stencil.vcxproj @@ -1,181 +1,16 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - {2ef070a1-f62f-4e6a-944b-88d140945c3c} Win32Proj rt - ispc + stencil + sse2,sse4-x2,avx1-i32x8 - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Document - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - + - - - diff --git a/examples/volume_rendering/volume.vcxproj b/examples/volume_rendering/volume.vcxproj index d3594b98..cc738a7e 100644 --- a/examples/volume_rendering/volume.vcxproj +++ b/examples/volume_rendering/volume.vcxproj @@ -1,177 +1,16 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - {dee5733a-e93e-449d-9114-9bffcaeb4df9} Win32Proj volume - ispc + volume + sse2,sse4-x2,avx1-i32x8 - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - + - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - - diff --git a/fail_db.txt b/fail_db.txt index 32917815..ff119d5a 100644 --- a/fail_db.txt +++ b/fail_db.txt @@ -277,13 +277,7 @@ .\tests\reduce-min-uint64.ispc runfail x86 avx1-i64x4 Windows LLVM 3.4 cl -O2 * ./tests/atomics-13.ispc compfail x86 sse4-i16x8 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/atomics-13.ispc compfail x86-64 sse4-i16x8 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-4.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-5.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-6.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/atomics-13.ispc compfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/atomics-13.ispc compfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.3 clang++3.3 -O2 * @@ -572,3 +566,68 @@ ./tests/reduce-equal-5.ispc compfail x86-64 generic-16 Linux LLVM 3.4 clang++3.3 -O0 * ./tests/reduce-equal-6.ispc compfail x86-64 generic-16 Linux LLVM 3.4 clang++3.3 -O0 * ./tests/reduce-equal-8.ispc compfail x86-64 generic-16 Linux LLVM 3.4 clang++3.3 -O0 * +./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O2 * +./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O2 * +./tests/half-1.ispc runfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/ptr-15.ispc runfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-10.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-11.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-12.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-13.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-14.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-2.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-3.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-4.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-9.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-10.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-8.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-9.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-and-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-and-2.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-or-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-10.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-12.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-13.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-2.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-3.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-4.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-5.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-6.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-8.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/test-143.ispc runfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O2 * +./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O2 * +./tests/half-1.ispc runfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/ptr-15.ispc runfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/ptr-19.ispc runfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/test-143.ispc runfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-10.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-11.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-12.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-13.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-14.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-2.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-3.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-4.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-9.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-10.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-8.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-9.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-and-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-and-2.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-or-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-10.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-12.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-13.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-2.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-3.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-4.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-5.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-6.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-8.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * diff --git a/ispc.vcxproj b/ispc.vcxproj index b9a3b6c5..8aee2988 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -57,17 +57,17 @@ - - - - + + + + - - - - - + + + + + 4146;4800;4996;4355;4624;4005;4003;4018 @@ -132,383 +132,215 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll 32bit > $(Configuration)/gen-bitcode-sse4-32bit.cpp - $(Configuration)/gen-bitcode-sse4-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll 32bit > $(Configuration)/gen-bitcode-sse4-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll 64bit > $(Configuration)/gen-bitcode-sse4-64bit.cpp + $(Configuration)/gen-bitcode-sse4-32bit.cpp; $(Configuration)/gen-bitcode-sse4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-32bit.cpp + Building gen-bitcode-sse4-32bit.cpp and gen-bitcode-sse4-64bit.cpp - + Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll 64bit > $(Configuration)/gen-bitcode-sse4-64bit.cpp - $(Configuration)/gen-bitcode-sse4-64bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-8.ll | python bitcode2cpp.py builtins\target-sse4-8.ll 32bit > $(Configuration)/gen-bitcode-sse4-8-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-8.ll | python bitcode2cpp.py builtins\target-sse4-8.ll 64bit > $(Configuration)/gen-bitcode-sse4-8-64bit.cpp + $(Configuration)/gen-bitcode-sse4-8-32bit.cpp; $(Configuration)/gen-bitcode-sse4-8-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-64bit.cpp + Building gen-bitcode-sse4-8-32bit.cpp and gen-bitcode-sse4-8-64bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-8.ll | python bitcode2cpp.py builtins\target-sse4-8.ll 32bit > $(Configuration)/gen-bitcode-sse4-8-32bit.cpp - $(Configuration)/gen-bitcode-sse4-8-32bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-8-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-8.ll | python bitcode2cpp.py builtins\target-sse4-8.ll 64bit > $(Configuration)/gen-bitcode-sse4-8-64bit.cpp - $(Configuration)/gen-bitcode-sse4-8-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-8-64bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-16.ll | python bitcode2cpp.py builtins\target-sse4-16.ll 32bit > $(Configuration)/gen-bitcode-sse4-16-32bit.cpp - $(Configuration)/gen-bitcode-sse4-16-32bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-16-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-16.ll | python bitcode2cpp.py builtins\target-sse4-16.ll 64bit > $(Configuration)/gen-bitcode-sse4-16-64bit.cpp - $(Configuration)/gen-bitcode-sse4-16-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-16-64bit.cpp - - - + Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll 32bit > $(Configuration)/gen-bitcode-sse4-x2-32bit.cpp - $(Configuration)/gen-bitcode-sse4-x2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-16.ll | python bitcode2cpp.py builtins\target-sse4-16.ll 32bit > $(Configuration)/gen-bitcode-sse4-16-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-16.ll | python bitcode2cpp.py builtins\target-sse4-16.ll 64bit > $(Configuration)/gen-bitcode-sse4-16-64bit.cpp + $(Configuration)/gen-bitcode-sse4-16-32bit.cpp; $(Configuration)/gen-bitcode-sse4-16-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-x2-32bit.cpp + Building gen-bitcode-sse4-16-32bit.cpp and gen-bitcode-sse4-16-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll 64bit > $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp - $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll 32bit > $(Configuration)/gen-bitcode-sse4-x2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll 64bit > $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp + $(Configuration)/gen-bitcode-sse4-x2-32bit.cpp; $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-x2-64bit.cpp + Building gen-bitcode-sse4-x2-32bit.cpp and gen-bitcode-sse4-x2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll 32bit > $(Configuration)/gen-bitcode-sse2-32bit.cpp - $(Configuration)/gen-bitcode-sse2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll 32bit > $(Configuration)/gen-bitcode-sse2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll 64bit > $(Configuration)/gen-bitcode-sse2-64bit.cpp + $(Configuration)/gen-bitcode-sse2-32bit.cpp; $(Configuration)/gen-bitcode-sse2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse2-common.ll - Building gen-bitcode-sse2-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll 64bit > $(Configuration)/gen-bitcode-sse2-64bit.cpp - $(Configuration)/gen-bitcode-sse2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse2-common.ll - Building gen-bitcode-sse2-64bit.cpp + Building gen-bitcode-sse2-32bit.cpp and gen-bitcode-sse2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll 32bit > $(Configuration)/gen-bitcode-sse2-x2-32bit.cpp - $(Configuration)/gen-bitcode-sse2-x2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll 32bit > $(Configuration)/gen-bitcode-sse2-x2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll 64bit > $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp + $(Configuration)/gen-bitcode-sse2-x2-32bit.cpp; $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse2-common.ll - Building gen-bitcode-sse2-x2-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll 64bit > $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp - $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse2-common.ll - Building gen-bitcode-sse2-x2-64bit.cpp + Building gen-bitcode-sse2-x2-32bit.cpp and gen-bitcode-sse2-x2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 32bit > $(Configuration)/gen-bitcode-avx1-32bit.cpp - $(Configuration)/gen-bitcode-avx1-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 32bit > $(Configuration)/gen-bitcode-avx1-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 64bit > $(Configuration)/gen-bitcode-avx1-64bit.cpp + $(Configuration)/gen-bitcode-avx1-32bit.cpp; $(Configuration)/gen-bitcode-avx1-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx1-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 64bit > $(Configuration)/gen-bitcode-avx1-64bit.cpp - $(Configuration)/gen-bitcode-avx1-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx1-64bit.cpp + Building gen-bitcode-avx1-32bit.cpp and gen-bitcode-avx1-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll 32bit > $(Configuration)/gen-bitcode-avx1-x2-32bit.cpp - $(Configuration)/gen-bitcode-avx1-x2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll 32bit > $(Configuration)/gen-bitcode-avx1-x2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll 64bit > $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp + $(Configuration)/gen-bitcode-avx1-x2-32bit.cpp; $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx1-x2-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll 64bit > $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp - $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx1-x2-64bit.cpp + Building gen-bitcode-avx1-x2-32bit.cpp and gen-bitcode-avx1-x2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1-i64x4.ll | python bitcode2cpp.py builtins\target-avx1-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx1-i64x4-32bit.cpp - $(Configuration)/gen-bitcode-avx1-i64x4-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1-i64x4.ll | python bitcode2cpp.py builtins\target-avx1-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx1-i64x4-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1-i64x4.ll | python bitcode2cpp.py builtins\target-avx1-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx1-i64x4-64bit.cpp + $(Configuration)/gen-bitcode-avx1-i64x4-32bit.cpp; $(Configuration)/gen-bitcode-avx1-i64x4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx1-i64x4-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1-i64x4.ll | python bitcode2cpp.py builtins\target-avx1-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx1-i64x4-64bit.cpp - $(Configuration)/gen-bitcode-avx1-i64x4-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx1-i64x4-64bit.cpp + Building gen-bitcode-avx1-i64x4-32bit.cpp and gen-bitcode-avx1-i64x4-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll 32bit > $(Configuration)/gen-bitcode-avx11-32bit.cpp - $(Configuration)/gen-bitcode-avx11-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll 32bit > $(Configuration)/gen-bitcode-avx11-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll 64bit > $(Configuration)/gen-bitcode-avx11-64bit.cpp + $(Configuration)/gen-bitcode-avx11-32bit.cpp; $(Configuration)/gen-bitcode-avx11-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx11-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll 64bit > $(Configuration)/gen-bitcode-avx11-64bit.cpp - $(Configuration)/gen-bitcode-avx11-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx11-64bit.cpp + Building gen-bitcode-avx11-32bit.cpp and gen-bitcode-avx11-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll 32bit > $(Configuration)/gen-bitcode-avx11-x2-32bit.cpp - $(Configuration)/gen-bitcode-avx11-x2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll 32bit > $(Configuration)/gen-bitcode-avx11-x2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll 64bit > $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp + $(Configuration)/gen-bitcode-avx11-x2-32bit.cpp; $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx11-x2-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll 64bit > $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp - $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx11-x2-64bit.cpp + Building gen-bitcode-avx11-x2-32bit.cpp and gen-bitcode-avx11-x2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp - $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp + $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp; $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx11-i64x4-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp - $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx11-i64x4-64bit.cpp + Building gen-bitcode-avx11-i64x4-32bit.cpp and gen-bitcode-avx11-i64x4-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll 32bit > $(Configuration)/gen-bitcode-avx2-32bit.cpp - $(Configuration)/gen-bitcode-avx2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll 32bit > $(Configuration)/gen-bitcode-avx2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll 64bit > $(Configuration)/gen-bitcode-avx2-64bit.cpp + $(Configuration)/gen-bitcode-avx2-32bit.cpp; $(Configuration)/gen-bitcode-avx2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx2-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll 64bit > $(Configuration)/gen-bitcode-avx2-64bit.cpp - $(Configuration)/gen-bitcode-avx2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx2-64bit.cpp + Building gen-bitcode-avx2-32bit.cpp and gen-bitcode-avx2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll 32bit > $(Configuration)/gen-bitcode-avx2-x2-32bit.cpp - $(Configuration)/gen-bitcode-avx2-x2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll 32bit > $(Configuration)/gen-bitcode-avx2-x2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll 64bit > $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp + $(Configuration)/gen-bitcode-avx2-x2-32bit.cpp; $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx2-x2-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll 64bit > $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp - $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx2-x2-64bit.cpp + Building gen-bitcode-avx2-x2-32bit.cpp and gen-bitcode-avx2-x2-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp - $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp + $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp; $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx2-i64x4-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp - $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx2-i64x4-64bit.cpp + Building gen-bitcode-avx2-i64x4-32bit.cpp and gen-bitcode-avx2-i64x4-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll 32bit > $(Configuration)/gen-bitcode-generic-1-32bit.cpp - $(Configuration)/gen-bitcode-generic-1-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll 32bit > $(Configuration)/gen-bitcode-generic-1-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll 64bit > $(Configuration)/gen-bitcode-generic-1-64bit.cpp + $(Configuration)/gen-bitcode-generic-1-32bit.cpp; $(Configuration)/gen-bitcode-generic-1-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-1-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll 64bit > $(Configuration)/gen-bitcode-generic-1-64bit.cpp - $(Configuration)/gen-bitcode-generic-1-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-1-64bit.cpp + Building gen-bitcode-generic-1-32bit.cpp and gen-bitcode-generic-1-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll 32bit > $(Configuration)/gen-bitcode-generic-4-32bit.cpp - $(Configuration)/gen-bitcode-generic-4-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll 32bit > $(Configuration)/gen-bitcode-generic-4-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll 64bit > $(Configuration)/gen-bitcode-generic-4-64bit.cpp + $(Configuration)/gen-bitcode-generic-4-32bit.cpp; $(Configuration)/gen-bitcode-generic-4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-4-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll 64bit > $(Configuration)/gen-bitcode-generic-4-64bit.cpp - $(Configuration)/gen-bitcode-generic-4-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-4-64bit.cpp + Building gen-bitcode-generic-4-32bit.cpp and gen-bitcode-generic-4-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll 32bit > $(Configuration)/gen-bitcode-generic-8-32bit.cpp - $(Configuration)/gen-bitcode-generic-8-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll 32bit > $(Configuration)/gen-bitcode-generic-8-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll 64bit > $(Configuration)/gen-bitcode-generic-8-64bit.cpp + $(Configuration)/gen-bitcode-generic-8-32bit.cpp; $(Configuration)/gen-bitcode-generic-8-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-8-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll 64bit > $(Configuration)/gen-bitcode-generic-8-64bit.cpp - $(Configuration)/gen-bitcode-generic-8-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-8-64bit.cpp + Building gen-bitcode-generic-8-32bit.cpp and gen-bitcode-generic-8-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll 32bit > $(Configuration)/gen-bitcode-generic-16-32bit.cpp - $(Configuration)/gen-bitcode-generic-16-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll 32bit > $(Configuration)/gen-bitcode-generic-16-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll 64bit > $(Configuration)/gen-bitcode-generic-16-64bit.cpp + $(Configuration)/gen-bitcode-generic-16-32bit.cpp; $(Configuration)/gen-bitcode-generic-16-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-16-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll 64bit > $(Configuration)/gen-bitcode-generic-16-64bit.cpp - $(Configuration)/gen-bitcode-generic-16-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-16-64bit.cpp + Building gen-bitcode-generic-16-32bit.cpp and gen-bitcode-generic-16-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll 32bit > $(Configuration)/gen-bitcode-generic-32-32bit.cpp - $(Configuration)/gen-bitcode-generic-32-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll 32bit > $(Configuration)/gen-bitcode-generic-32-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll 64bit > $(Configuration)/gen-bitcode-generic-32-64bit.cpp + $(Configuration)/gen-bitcode-generic-32-32bit.cpp; $(Configuration)/gen-bitcode-generic-32-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-32-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll 64bit > $(Configuration)/gen-bitcode-generic-32-64bit.cpp - $(Configuration)/gen-bitcode-generic-32-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-32-64bit.cpp + Building gen-bitcode-generic-32-32bit.cpp and gen-bitcode-generic-32-64bit.cpp Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll 32bit > $(Configuration)/gen-bitcode-generic-64-32bit.cpp - $(Configuration)/gen-bitcode-generic-64-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll 32bit > $(Configuration)/gen-bitcode-generic-64-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll 64bit > $(Configuration)/gen-bitcode-generic-64-64bit.cpp + $(Configuration)/gen-bitcode-generic-64-32bit.cpp; $(Configuration)/gen-bitcode-generic-64-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-64-32bit.cpp + Building gen-bitcode-generic-64-32bit.cpp and gen-bitcode-generic-64-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll 64bit > $(Configuration)/gen-bitcode-generic-64-64bit.cpp - $(Configuration)/gen-bitcode-generic-64-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-64-64bit.cpp - - - - + Document flex -t lex.ll > $(Configuration)\lex.cc $(Configuration)\lex.cc @@ -597,4 +429,4 @@ - + diff --git a/llvm_patches/3_3_r195476_r195779_i16_sext.patch b/llvm_patches/3_3_r195476_r195779_i16_sext.patch new file mode 100644 index 00000000..a49325c9 --- /dev/null +++ b/llvm_patches/3_3_r195476_r195779_i16_sext.patch @@ -0,0 +1,57 @@ +Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details. + +Index: lib/Target/X86/X86ISelLowering.cpp +=================================================================== +--- lib/Target/X86/X86ISelLowering.cpp (revision 195862) ++++ lib/Target/X86/X86ISelLowering.cpp (working copy) +@@ -12099,19 +12099,27 @@ + // fall through + case MVT::v4i32: + case MVT::v8i16: { +- // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && +- Op00.getOpcode() == ISD::VECTOR_SHUFFLE) ++ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) { ++ // (sext (vzext x)) -> (vsext x) + Tmp1 = LowerVectorIntExtend(Op00, DAG); +- if (Tmp1.getNode()) { +- SDValue Tmp1Op0 = Tmp1.getOperand(0); +- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && +- "This optimization is invalid without a VZEXT."); +- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ if (Tmp1.getNode()) { ++ EVT ExtraEltVT = ExtraVT.getVectorElementType(); ++ // This folding is only valid when the in-reg type is a vector of i8, ++ // i16, or i32. ++ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 || ++ ExtraEltVT == MVT::i32) { ++ SDValue Tmp1Op0 = Tmp1.getOperand(0); ++ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && ++ "This optimization is invalid without a VZEXT."); ++ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ } ++ Op0 = Tmp1; ++ } + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. +@@ -15826,6 +15834,15 @@ + if (BitWidth == 1) + return SDValue(); + ++ // Check all uses of that condition operand to check whether it will be ++ // consumed by non-BLEND instructions, which may depend on all bits are set ++ // properly. ++ for (SDNode::use_iterator I = Cond->use_begin(), ++ E = Cond->use_end(); I != E; ++I) ++ if (I->getOpcode() != ISD::VSELECT) ++ // TODO: Add other opcodes eventually lowered into BLEND. ++ return SDValue(); ++ + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); + diff --git a/llvm_patches/3_4_r195476_r195779_i16_sext.patch b/llvm_patches/3_4_r195476_r195779_i16_sext.patch new file mode 100644 index 00000000..4e2c0f6b --- /dev/null +++ b/llvm_patches/3_4_r195476_r195779_i16_sext.patch @@ -0,0 +1,57 @@ +Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details. + +Index: lib/Target/X86/X86ISelLowering.cpp +=================================================================== +--- lib/Target/X86/X86ISelLowering.cpp (revision 195863) ++++ lib/Target/X86/X86ISelLowering.cpp (working copy) +@@ -13120,19 +13120,27 @@ + // fall through + case MVT::v4i32: + case MVT::v8i16: { +- // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && +- Op00.getOpcode() == ISD::VECTOR_SHUFFLE) ++ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) { ++ // (sext (vzext x)) -> (vsext x) + Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG); +- if (Tmp1.getNode()) { +- SDValue Tmp1Op0 = Tmp1.getOperand(0); +- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && +- "This optimization is invalid without a VZEXT."); +- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ if (Tmp1.getNode()) { ++ EVT ExtraEltVT = ExtraVT.getVectorElementType(); ++ // This folding is only valid when the in-reg type is a vector of i8, ++ // i16, or i32. ++ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 || ++ ExtraEltVT == MVT::i32) { ++ SDValue Tmp1Op0 = Tmp1.getOperand(0); ++ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && ++ "This optimization is invalid without a VZEXT."); ++ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ } ++ Op0 = Tmp1; ++ } + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. +@@ -17007,6 +17015,15 @@ + if (BitWidth == 1) + return SDValue(); + ++ // Check all uses of that condition operand to check whether it will be ++ // consumed by non-BLEND instructions, which may depend on all bits are set ++ // properly. ++ for (SDNode::use_iterator I = Cond->use_begin(), ++ E = Cond->use_end(); I != E; ++I) ++ if (I->getOpcode() != ISD::VSELECT) ++ // TODO: Add other opcodes eventually lowered into BLEND. ++ return SDValue(); ++ + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); + diff --git a/module.cpp b/module.cpp index 1afc5a0b..6006ea34 100644 --- a/module.cpp +++ b/module.cpp @@ -2104,7 +2104,7 @@ lAddExtractedGlobals(llvm::Module *module, // example, this happens with varying globals if we compile // to different vector widths. if (gv2->getType() != gv->getType()) - Error(rgi.pos, "Mismatch in size/layout of global " + Warning(rgi.pos, "Mismatch in size/layout of global " "variable \"%s\" with different targets. " "Globals must not include \"varying\" types or arrays " "with size based on programCount when compiling to " diff --git a/opt.cpp b/opt.cpp index 8df0f4fe..3e320b4b 100644 --- a/opt.cpp +++ b/opt.cpp @@ -127,6 +127,8 @@ static llvm::Pass *CreateDebugPass(char * output); static llvm::Pass *CreateReplaceStdlibShiftPass(); +static llvm::Pass *CreateFixBooleanSelectPass(); + #define DEBUG_START_PASS(NAME) \ if (g->debugPrint && \ (getenv("FUNC") == NULL || \ @@ -659,6 +661,9 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(CreateMakeInternalFuncsStaticPass()); optPM.add(llvm::createGlobalDCEPass()); optPM.add(llvm::createConstantMergePass()); + + // Should be the last + optPM.add(CreateFixBooleanSelectPass(), 400); } // Finish up by making sure we didn't mess anything up in the IR along @@ -670,6 +675,7 @@ Optimize(llvm::Module *module, int optLevel) { printf("\n*****\nFINAL OUTPUT\n*****\n"); module->dump(); } + } @@ -1022,12 +1028,12 @@ InstructionSimplifyPass::simplifyBoolVec(llvm::Value *value) { if (trunc != NULL) { // Convert trunc({sext,zext}(i1 vector)) -> (i1 vector) llvm::SExtInst *sext = llvm::dyn_cast(value); - if (sext && + if (sext && sext->getOperand(0)->getType() == LLVMTypes::Int1VectorType) return sext->getOperand(0); llvm::ZExtInst *zext = llvm::dyn_cast(value); - if (zext && + if (zext && zext->getOperand(0)->getType() == LLVMTypes::Int1VectorType) return zext->getOperand(0); } @@ -1853,7 +1859,7 @@ lIs32BitSafeHelper(llvm::Value *v) { // handle Adds, SExts, Constant Vectors if (llvm::BinaryOperator *bop = llvm::dyn_cast(v)) { if (bop->getOpcode() == llvm::Instruction::Add) { - return lIs32BitSafeHelper(bop->getOperand(0)) + return lIs32BitSafeHelper(bop->getOperand(0)) && lIs32BitSafeHelper(bop->getOperand(1)); } return false; @@ -4961,7 +4967,7 @@ bool ReplaceStdlibShiftPass::runOnBasicBlock(llvm::BasicBlock &bb) { DEBUG_START_PASS("ReplaceStdlibShiftPass"); bool modifiedAny = false; - + llvm::Function *shifts[6]; shifts[0] = m->module->getFunction("__shift_i8"); shifts[1] = m->module->getFunction("__shift_i16"); @@ -4992,19 +4998,19 @@ ReplaceStdlibShiftPass::runOnBasicBlock(llvm::BasicBlock &bb) { } llvm::Value *shuffleIdxs = LLVMInt32Vector(shuffleVals); llvm::Value *zeroVec = llvm::ConstantAggregateZero::get(shiftedVec->getType()); - llvm::Value *shuffle = new llvm::ShuffleVectorInst(shiftedVec, zeroVec, + llvm::Value *shuffle = new llvm::ShuffleVectorInst(shiftedVec, zeroVec, shuffleIdxs, "vecShift", ci); ci->replaceAllUsesWith(shuffle); modifiedAny = true; delete [] shuffleVals; } else { - PerformanceWarning(SourcePos(), "Stdlib shift() called without constant shift amount."); + PerformanceWarning(SourcePos(), "Stdlib shift() called without constant shift amount."); } } } } } - + DEBUG_END_PASS("ReplaceStdlibShiftPass"); return modifiedAny; @@ -5015,3 +5021,185 @@ static llvm::Pass * CreateReplaceStdlibShiftPass() { return new ReplaceStdlibShiftPass(); } + + + +/////////////////////////////////////////////////////////////////////////////// +// FixBooleanSelect +// +// The problem is that in LLVM 3.3, optimizer doesn't like +// the following instruction sequence: +// %cmp = fcmp olt <8 x float> %a, %b +// %sext_cmp = sext <8 x i1> %cmp to <8 x i32> +// %new_mask = and <8 x i32> %sext_cmp, %mask +// and optimizes it to the following: +// %cmp = fcmp olt <8 x float> %a, %b +// %cond = select <8 x i1> %cmp, <8 x i32> %mask, <8 x i32> zeroinitializer +// +// It wouldn't be a problem if codegen produced good code for it. But it +// doesn't, especially for vectors larger than native vectors. +// +// This optimization reverts this pattern and should be the last one before +// code gen. +// +// Note that this problem was introduced in LLVM 3.3. But in LLVM 3.4 it was +// fixed. See commit r194542. +// +// After LLVM 3.3 this optimization should probably stay for experimental +// purposes and code should be compared with and without this optimization from +// time to time to make sure that LLVM does right thing. +/////////////////////////////////////////////////////////////////////////////// + +class FixBooleanSelectPass : public llvm::FunctionPass { +public: + static char ID; + FixBooleanSelectPass() :FunctionPass(ID) {} + + const char *getPassName() const { return "Resolve \"replace extract insert chains\""; } + bool runOnFunction(llvm::Function &F); + +private: + llvm::Instruction* fixSelect(llvm::SelectInst* sel, llvm::SExtInst* sext); +}; + +char FixBooleanSelectPass::ID = 0; + +llvm::Instruction* FixBooleanSelectPass::fixSelect(llvm::SelectInst* sel, llvm::SExtInst* sext) { + // Select instruction result type and its integer equivalent + llvm::VectorType *orig_type = llvm::dyn_cast(sel->getType()); + llvm::VectorType *int_type = llvm::VectorType::getInteger(orig_type); + + // Result value and optional pointer to instruction to delete + llvm::Instruction *result = 0, *optional_to_delete = 0; + + // It can be vector of integers or vector of floating point values. + if (orig_type->getElementType()->isIntegerTy()) { + // Generate sext+and, remove select. + result = llvm::BinaryOperator::CreateAnd(sext, sel->getTrueValue(), "and_mask", sel); + } else { + llvm::BitCastInst* bc = llvm::dyn_cast(sel->getTrueValue()); + + if (bc && bc->hasOneUse() && bc->getSrcTy()->isIntOrIntVectorTy() && bc->getSrcTy()->isVectorTy() && + llvm::isa(bc->getOperand(0)) && + llvm::dyn_cast(bc->getOperand(0))->getParent() == sel->getParent()) { + // Bitcast is casting form integer type, it's operand is instruction, which is located in the same basic block (otherwise it's unsafe to use it). + // bitcast+select => sext+and+bicast + // Create and + llvm::BinaryOperator* and_inst = llvm::BinaryOperator::CreateAnd(sext, bc->getOperand(0), "and_mask", sel); + // Bitcast back to original type + result = new llvm::BitCastInst(and_inst, sel->getType(), "bitcast_mask_out", sel); + // Original bitcast will be removed + optional_to_delete = bc; + } else { + // General case: select => bitcast+sext+and+bitcast + // Bitcast + llvm::BitCastInst* bc_in = new llvm::BitCastInst(sel->getTrueValue(), int_type, "bitcast_mask_in", sel); + // And + llvm::BinaryOperator* and_inst = llvm::BinaryOperator::CreateAnd(sext, bc_in, "and_mask", sel); + // Bitcast back to original type + result = new llvm::BitCastInst(and_inst, sel->getType(), "bitcast_mask_out", sel); + } + } + + // Done, finalize. + sel->replaceAllUsesWith(result); + sel->eraseFromParent(); + if (optional_to_delete) { + optional_to_delete->eraseFromParent(); + } + + return result; +} + +bool +FixBooleanSelectPass::runOnFunction(llvm::Function &F) { + bool modifiedAny = false; + + // LLVM 3.3 only +#if defined(LLVM_3_3) + + for (llvm::Function::iterator I = F.begin(), E = F.end(); + I != E; ++I) { + llvm::BasicBlock* bb = &*I; + for (llvm::BasicBlock::iterator iter = bb->begin(), e = bb->end(); iter != e; ++iter) { + llvm::Instruction *inst = &*iter; + + llvm::CmpInst *cmp = llvm::dyn_cast(inst); + + if (cmp && + cmp->getType()->isVectorTy() && + cmp->getType()->getVectorElementType()->isIntegerTy(1)) { + + // Search for select instruction uses. + int selects = 0; + llvm::VectorType* sext_type = 0; + for (llvm::Instruction::use_iterator it=cmp->use_begin(); it!=cmp->use_end(); ++it ) { + llvm::SelectInst* sel = llvm::dyn_cast(*it); + if (sel && + sel->getType()->isVectorTy() && + sel->getType()->getScalarSizeInBits() > 1) { + selects++; + // We pick the first one, but typical case when all select types are the same. + sext_type = llvm::dyn_cast(sel->getType()); + break; + } + } + if (selects == 0) { + continue; + } + // Get an integer equivalent, if it's not yet an integer. + sext_type = llvm::VectorType::getInteger(sext_type); + + // Do transformation + llvm::BasicBlock::iterator iter_copy=iter; + llvm::Instruction* next_inst = &*(++iter_copy); + // Create or reuse sext + llvm::SExtInst* sext = llvm::dyn_cast(next_inst); + if (sext && + sext->getOperand(0) == cmp && + sext->getDestTy() == sext_type) { + // This sext can be reused + } else { + if (next_inst) { + sext = new llvm::SExtInst(cmp, sext_type, "sext_cmp", next_inst); + } else { + sext = new llvm::SExtInst(cmp, sext_type, "sext_cmp", bb); + } + } + + // Walk and fix selects + std::vector sel_uses; + for (llvm::Instruction::use_iterator it=cmp->use_begin(); it!=cmp->use_end(); ++it) { + llvm::SelectInst* sel = llvm::dyn_cast(*it); + if (sel && + sel->getType()->getScalarSizeInBits() == sext_type->getScalarSizeInBits()) { + + // Check that second operand is zero. + llvm::Constant* false_cond = llvm::dyn_cast(sel->getFalseValue()); + if (false_cond && + false_cond->isZeroValue()) { + sel_uses.push_back(sel); + modifiedAny = true; + } + } + } + + for (int i=0; i> " + perf_temp + "_ref" ex_command = "./test " + command + " >> " + perf_temp + "_test" @@ -487,8 +491,8 @@ def perf(options1, args): else: ex_command_ref = "x64\\Release\\ref.exe " + command + " >> " + perf_temp + "_ref" ex_command = "x64\\Release1\\test.exe " + command + " >> " + perf_temp + "_test" - bu_command_ref = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=ref /p:ISPC_compiler=ispc_ref /t:rebuild >> " + build_log - bu_command = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=test /p:ISPC_compiler=ispc /t:rebuild >> " + build_log + bu_command_ref = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=ref /p:ISPC_compiler=ispc_ref " + Target_out + " /t:rebuild >> " + build_log + bu_command = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=test /p:ISPC_compiler=ispc " + Target_out + " /t:rebuild >> " + build_log re_command = "msbuild /t:clean >> " + build_log commands = [ex_command, bu_command, ex_command_ref, bu_command_ref, re_command] # parsing config parameters diff --git a/run_tests.py b/run_tests.py index 506d37a5..3f03cc9b 100755 --- a/run_tests.py +++ b/run_tests.py @@ -454,7 +454,7 @@ def verify(): check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"], ["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"], ["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8", - "sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", + "sse4-i8x16", "avx1-i32x4" "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4", "avx2-i32x8", "avx2-i32x16", "avx2-i64x4", "generic-1", "generic-4", "generic-8", "generic-16", "generic-32", "generic-64"]]