From 18f90e63391101d414b1ecca7a5ee7430e621e62 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Fri, 22 Nov 2013 17:06:19 +0400 Subject: [PATCH 01/21] fix of perf.py --- perf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf.py b/perf.py index 1b9de452..2d83475f 100755 --- a/perf.py +++ b/perf.py @@ -336,7 +336,7 @@ def perf(options1, args): test_only_r = " sse2-i32x4 sse2-i32x8 sse4-i32x4 sse4-i32x8 sse4-i16x8 \ sse4-i8x16 avx1-i32x8 avx1-i32x16 avx1-i64x4 avx1.1-i32x8 \ avx1.1-i32x16 avx1.1-i64x4 avx2-i32x8 avx2-i32x16 avx2-i64x4 " - test_only = options.perf_target.split(" ") + test_only = options.perf_target.split(",") for iterator in test_only: if not (" " + iterator + " " in test_only_r): error("unknow option for target: " + iterator, 1) From 8b972f2ed6b3c3f4a775f72bcc512f337fb60084 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Tue, 26 Nov 2013 17:08:06 +0400 Subject: [PATCH 02/21] Changing error to warning: mismatch in size/layout of global variable --- module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module.cpp b/module.cpp index 1afc5a0b..6006ea34 100644 --- a/module.cpp +++ b/module.cpp @@ -2104,7 +2104,7 @@ lAddExtractedGlobals(llvm::Module *module, // example, this happens with varying globals if we compile // to different vector widths. if (gv2->getType() != gv->getType()) - Error(rgi.pos, "Mismatch in size/layout of global " + Warning(rgi.pos, "Mismatch in size/layout of global " "variable \"%s\" with different targets. " "Globals must not include \"varying\" types or arrays " "with size based on programCount when compiling to " From 935800d7f69de46c1cda1a0055d85f01a0709ada Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Mon, 25 Nov 2013 13:31:26 +0400 Subject: [PATCH 03/21] making common.props --- examples/aobench/aobench.vcxproj | 173 ++-------------- examples/common.props | 149 ++++++++++++++ examples/deferred/deferred_shading.vcxproj | 163 +-------------- examples/mandelbrot/mandelbrot.vcxproj | 163 +-------------- .../mandelbrot_tasks/mandelbrot_tasks.vcxproj | 161 +-------------- examples/noise/noise.vcxproj | 193 +++--------------- examples/options/options.vcxproj | 159 +-------------- examples/rt/rt.vcxproj | 163 +-------------- examples/sort/sort.vcxproj | 157 +------------- examples/stencil/stencil.vcxproj | 147 +------------ examples/volume_rendering/volume.vcxproj | 157 +------------- 11 files changed, 231 insertions(+), 1554 deletions(-) create mode 100644 examples/common.props diff --git a/examples/aobench/aobench.vcxproj b/examples/aobench/aobench.vcxproj index a5b354ce..270af3b1 100644 --- a/examples/aobench/aobench.vcxproj +++ b/examples/aobench/aobench.vcxproj @@ -1,28 +1,12 @@  - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - - - - + + + {F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB} + Win32Proj + aobench + ispc + Document @@ -40,142 +24,9 @@ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - {F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB} - Win32Proj - aobench - ispc - - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - ao - - - true - $(ExecutablePath);$(ProjectDir)..\.. - ao - - - false - $(ProjectDir)..\..;$(ExecutablePath) - ao - - - false - $(ProjectDir)..\..;$(ExecutablePath) - ao - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - + + + + + diff --git a/examples/common.props b/examples/common.props new file mode 100644 index 00000000..6c1e9596 --- /dev/null +++ b/examples/common.props @@ -0,0 +1,149 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + + Application + true + Unicode + + + Application + true + Unicode + + + Application + false + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + + + + + + + true + $(ProjectDir)..\..;$(ExecutablePath) + + + true + $(ProjectDir)..\..;$(ExecutablePath) + + + false + $(ProjectDir)..\..;$(ExecutablePath) + + + false + $(ProjectDir)..\..;$(ExecutablePath) + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + + diff --git a/examples/deferred/deferred_shading.vcxproj b/examples/deferred/deferred_shading.vcxproj index 94e38540..dc90cec5 100755 --- a/examples/deferred/deferred_shading.vcxproj +++ b/examples/deferred/deferred_shading.vcxproj @@ -1,161 +1,12 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - + {87f53c53-957e-4e91-878a-bc27828fb9eb} Win32Proj - mandelbrot + deferred ispc - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - - - - - Document @@ -173,7 +24,11 @@ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - + + + + + + + diff --git a/examples/mandelbrot/mandelbrot.vcxproj b/examples/mandelbrot/mandelbrot.vcxproj index 1b6f1281..6db650a1 100644 --- a/examples/mandelbrot/mandelbrot.vcxproj +++ b/examples/mandelbrot/mandelbrot.vcxproj @@ -1,176 +1,31 @@  - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1} Win32Proj mandelbrot ispc - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - + + + + diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj index fbebdc32..41c9c75d 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj @@ -1,163 +1,12 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - + {E80DA7D4-AB22-4648-A068-327307156BE6} Win32Proj mandelbrot_tasks ispc - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - true - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - false - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - false - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - - - Document @@ -175,7 +24,9 @@ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - + + + + + diff --git a/examples/noise/noise.vcxproj b/examples/noise/noise.vcxproj index 01456625..dbb309dd 100644 --- a/examples/noise/noise.vcxproj +++ b/examples/noise/noise.vcxproj @@ -1,176 +1,31 @@ - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD} - Win32Proj - noise + + + + {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD} + Win32Proj + noise ispc - - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - - - - - Document + + + + Document $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - - + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + + + + + + diff --git a/examples/options/options.vcxproj b/examples/options/options.vcxproj index 77fb9353..e61f2d80 100644 --- a/examples/options/options.vcxproj +++ b/examples/options/options.vcxproj @@ -1,163 +1,12 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - + {8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE} Win32Proj options ispc - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - 4305 - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - 4305 - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - 4305 - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - 4305 - Fast - - - Console - true - true - true - - - - - - - Document @@ -176,9 +25,9 @@ + + + - - - diff --git a/examples/rt/rt.vcxproj b/examples/rt/rt.vcxproj index 19d40192..3fbc6874 100644 --- a/examples/rt/rt.vcxproj +++ b/examples/rt/rt.vcxproj @@ -1,171 +1,21 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - + {E787BC3F-2D2E-425E-A64D-4721E2FF3DC9} Win32Proj rt ispc - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - Document - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h @@ -175,7 +25,4 @@ $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(Target - - - diff --git a/examples/sort/sort.vcxproj b/examples/sort/sort.vcxproj index b37eab1c..7bf90aa6 100644 --- a/examples/sort/sort.vcxproj +++ b/examples/sort/sort.vcxproj @@ -1,159 +1,12 @@  - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - + {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2} Win32Proj sort ispc - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - - - Document @@ -171,7 +24,9 @@ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - + + + + + diff --git a/examples/stencil/stencil.vcxproj b/examples/stencil/stencil.vcxproj index a96a187d..2814d5a1 100644 --- a/examples/stencil/stencil.vcxproj +++ b/examples/stencil/stencil.vcxproj @@ -1,154 +1,12 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - + {2ef070a1-f62f-4e6a-944b-88d140945c3c} Win32Proj rt ispc - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - Document @@ -175,7 +33,4 @@ $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(Target - - - diff --git a/examples/volume_rendering/volume.vcxproj b/examples/volume_rendering/volume.vcxproj index d3594b98..78f5ba86 100644 --- a/examples/volume_rendering/volume.vcxproj +++ b/examples/volume_rendering/volume.vcxproj @@ -1,159 +1,12 @@ - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - + {dee5733a-e93e-449d-9114-9bffcaeb4df9} Win32Proj volume ispc - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - true - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - false - $(ProjectDir)..\..;$(ExecutablePath) - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - - - Document @@ -171,7 +24,9 @@ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - + + + + + From f3ff1fcbebd9f8c8f6ba573d5a1f140c744d3e41 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Mon, 25 Nov 2013 23:37:42 +0400 Subject: [PATCH 04/21] supporting targets in perf windows --- examples/aobench/aobench.vcxproj | 22 +++------------- examples/common.props | 23 ++++++++++++++++ examples/deferred/deferred_shading.vcxproj | 22 +++------------- examples/mandelbrot/mandelbrot.vcxproj | 22 +++------------- .../mandelbrot_tasks/mandelbrot_tasks.vcxproj | 22 +++------------- examples/noise/noise.vcxproj | 22 +++------------- examples/options/options.vcxproj | 22 +++------------- examples/rt/rt.vcxproj | 18 +++---------- examples/sort/sort.vcxproj | 22 +++------------- examples/stencil/stencil.vcxproj | 26 +++---------------- examples/volume_rendering/volume.vcxproj | 22 +++------------- perf.py | 14 ++++++---- run_tests.py | 2 +- 13 files changed, 63 insertions(+), 196 deletions(-) diff --git a/examples/aobench/aobench.vcxproj b/examples/aobench/aobench.vcxproj index 270af3b1..c46ee41a 100644 --- a/examples/aobench/aobench.vcxproj +++ b/examples/aobench/aobench.vcxproj @@ -1,29 +1,13 @@  - {F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB} Win32Proj aobench - ispc + ao + sse2,sse4,avx1-i32x8 - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - + diff --git a/examples/common.props b/examples/common.props index 6c1e9596..7bf37005 100644 --- a/examples/common.props +++ b/examples/common.props @@ -143,6 +143,29 @@ true + + ispc + $(default_targets) + $(TargetDir)$(ISPC_file).obj + $(Target_out);$(TargetDir)$(ISPC_file)_sse2.obj + $(Target_out);$(TargetDir)$(ISPC_file)_sse4.obj + $(Target_out);$(TargetDir)$(ISPC_file)_avx.obj + $(Target_out);$(TargetDir)$(ISPC_file)_avx11.obj + $(Target_out);$(TargetDir)$(ISPC_file)_avx2.obj + + + + Document + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str) + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str) + $(Target_out);$(TargetDir)%(Filename)_ispc.h + $(Target_out);$(TargetDir)%(Filename)_ispc.h + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=$(Target_str) + $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=$(Target_str) + $(Target_out);$(TargetDir)%(Filename)_ispc.h + $(Target_out);$(TargetDir)%(Filename)_ispc.h + + diff --git a/examples/deferred/deferred_shading.vcxproj b/examples/deferred/deferred_shading.vcxproj index dc90cec5..cd361b26 100755 --- a/examples/deferred/deferred_shading.vcxproj +++ b/examples/deferred/deferred_shading.vcxproj @@ -1,29 +1,13 @@ - {87f53c53-957e-4e91-878a-bc27828fb9eb} Win32Proj deferred - ispc + kernels + sse2,sse4-x2,avx1-x2 - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - + diff --git a/examples/mandelbrot/mandelbrot.vcxproj b/examples/mandelbrot/mandelbrot.vcxproj index 6db650a1..e7703ad0 100644 --- a/examples/mandelbrot/mandelbrot.vcxproj +++ b/examples/mandelbrot/mandelbrot.vcxproj @@ -1,29 +1,13 @@  - {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1} Win32Proj mandelbrot - ispc + mandelbrot + sse2,sse4-x2,avx1-x2 - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - + diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj index 41c9c75d..f8b8cfcb 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj @@ -1,30 +1,14 @@ - {E80DA7D4-AB22-4648-A068-327307156BE6} Win32Proj mandelbrot_tasks - ispc + mandelbrot_tasks + sse2,sse4-x2,avx1-x2 + - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - diff --git a/examples/noise/noise.vcxproj b/examples/noise/noise.vcxproj index dbb309dd..7adc57f3 100644 --- a/examples/noise/noise.vcxproj +++ b/examples/noise/noise.vcxproj @@ -1,30 +1,14 @@ - {0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD} Win32Proj noise - ispc + noise + sse2,sse4,avx1-x2 + - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - diff --git a/examples/options/options.vcxproj b/examples/options/options.vcxproj index e61f2d80..af336aa1 100644 --- a/examples/options/options.vcxproj +++ b/examples/options/options.vcxproj @@ -1,29 +1,13 @@ - {8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE} Win32Proj options - ispc + options + sse2,sse4-x2,avx1-x2 - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - + diff --git a/examples/rt/rt.vcxproj b/examples/rt/rt.vcxproj index 3fbc6874..ea34de56 100644 --- a/examples/rt/rt.vcxproj +++ b/examples/rt/rt.vcxproj @@ -1,25 +1,13 @@ - {E787BC3F-2D2E-425E-A64D-4721E2FF3DC9} Win32Proj rt - ispc + rt + sse2,sse4-x2,avx1-i32x8 - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - + diff --git a/examples/sort/sort.vcxproj b/examples/sort/sort.vcxproj index 7bf90aa6..43f2b439 100644 --- a/examples/sort/sort.vcxproj +++ b/examples/sort/sort.vcxproj @@ -1,29 +1,13 @@  - {6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2} Win32Proj sort - ispc + sort + sse2,sse4-x2,avx1-x2 - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - + diff --git a/examples/stencil/stencil.vcxproj b/examples/stencil/stencil.vcxproj index 2814d5a1..b5f5bb22 100644 --- a/examples/stencil/stencil.vcxproj +++ b/examples/stencil/stencil.vcxproj @@ -1,33 +1,13 @@ - {2ef070a1-f62f-4e6a-944b-88d140945c3c} Win32Proj rt - ispc + stencil + sse2,sse4-x2,avx1-i32x8 - - - Document - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - -$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - + diff --git a/examples/volume_rendering/volume.vcxproj b/examples/volume_rendering/volume.vcxproj index 78f5ba86..cc738a7e 100644 --- a/examples/volume_rendering/volume.vcxproj +++ b/examples/volume_rendering/volume.vcxproj @@ -1,29 +1,13 @@ - {dee5733a-e93e-449d-9114-9bffcaeb4df9} Win32Proj volume - ispc + volume + sse2,sse4-x2,avx1-i32x8 - - - Document - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx - - $(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - + diff --git a/perf.py b/perf.py index 2d83475f..d1134990 100755 --- a/perf.py +++ b/perf.py @@ -44,7 +44,8 @@ def build_test(commands): test = os.system(commands[1]) if is_windows: common.remove_if_exists(".\\X64\\Release1") - os.rename(".\\X64\\Release", ".\\X64\\Release1") + if (test == 0): + os.rename(".\\X64\\Release", ".\\X64\\Release1") if options.ref: ref = os.system(commands[3]) return (options.ref and ref) or test @@ -334,7 +335,7 @@ def perf(options1, args): if options.perf_target != "": test_only_r = " sse2-i32x4 sse2-i32x8 sse4-i32x4 sse4-i32x8 sse4-i16x8 \ - sse4-i8x16 avx1-i32x8 avx1-i32x16 avx1-i64x4 avx1.1-i32x8 \ + sse4-i8x16 avx1-i32x4 avx1-i32x8 avx1-i32x16 avx1-i64x4 avx1.1-i32x8 \ avx1.1-i32x16 avx1.1-i64x4 avx2-i32x8 avx2-i32x16 avx2-i64x4 " test_only = options.perf_target.split(",") for iterator in test_only: @@ -467,17 +468,20 @@ def perf(options1, args): command = command[:-1] # handle conditional target argument target_str_temp = "" + target_out_temp = "" perf_targets = [""] target_number = 1 if options.perf_target != "": perf_targets = options.perf_target.split(',') target_str_temp = " ISPC_IA_TARGETS=" + target_out_temp = " /p:Target_str=" target_number = len(perf_targets) temp = 0 for target_i in range(target_number): test = [lines[i][:-1],[],[],[],[],[]] test_ref = [lines[i][:-1],[],[],[],[],[]] - target_str = target_str_temp + perf_targets[target_i] + target_str = target_str_temp + perf_targets[target_i] + Target_out = target_out_temp + perf_targets[target_i] if is_windows == False: ex_command_ref = "./ref " + command + " >> " + perf_temp + "_ref" ex_command = "./test " + command + " >> " + perf_temp + "_test" @@ -487,8 +491,8 @@ def perf(options1, args): else: ex_command_ref = "x64\\Release\\ref.exe " + command + " >> " + perf_temp + "_ref" ex_command = "x64\\Release1\\test.exe " + command + " >> " + perf_temp + "_test" - bu_command_ref = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=ref /p:ISPC_compiler=ispc_ref /t:rebuild >> " + build_log - bu_command = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=test /p:ISPC_compiler=ispc /t:rebuild >> " + build_log + bu_command_ref = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=ref /p:ISPC_compiler=ispc_ref " + Target_out + " /t:rebuild >> " + build_log + bu_command = "msbuild /V:m /p:Platform=x64 /p:Configuration=Release /p:TargetDir=.\ /p:TargetName=test /p:ISPC_compiler=ispc " + Target_out + " /t:rebuild >> " + build_log re_command = "msbuild /t:clean >> " + build_log commands = [ex_command, bu_command, ex_command_ref, bu_command_ref, re_command] # parsing config parameters diff --git a/run_tests.py b/run_tests.py index 506d37a5..3f03cc9b 100755 --- a/run_tests.py +++ b/run_tests.py @@ -454,7 +454,7 @@ def verify(): check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"], ["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"], ["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8", - "sse4-i8x16", "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", + "sse4-i8x16", "avx1-i32x4" "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4", "avx2-i32x8", "avx2-i32x16", "avx2-i64x4", "generic-1", "generic-4", "generic-8", "generic-16", "generic-32", "generic-64"]] From 218d2892e8eb8abbc8c1cb480336ecece89e0911 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Wed, 27 Nov 2013 03:24:17 +0400 Subject: [PATCH 05/21] fail_db.txt update with LLVM 3.5 (trunk) results on Linux --- fail_db.txt | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/fail_db.txt b/fail_db.txt index 32917815..6351fa8f 100644 --- a/fail_db.txt +++ b/fail_db.txt @@ -572,3 +572,68 @@ ./tests/reduce-equal-5.ispc compfail x86-64 generic-16 Linux LLVM 3.4 clang++3.3 -O0 * ./tests/reduce-equal-6.ispc compfail x86-64 generic-16 Linux LLVM 3.4 clang++3.3 -O0 * ./tests/reduce-equal-8.ispc compfail x86-64 generic-16 Linux LLVM 3.4 clang++3.3 -O0 * +./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O2 * +./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O2 * +./tests/half-1.ispc runfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/ptr-15.ispc runfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-10.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-11.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-12.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-13.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-14.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-2.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-3.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-4.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-9.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-10.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-8.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-9.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-and-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-and-2.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-or-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-1.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-10.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-12.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-13.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-2.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-3.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-4.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-5.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-6.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-8.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/test-143.ispc runfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O2 * +./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O2 * +./tests/half-1.ispc runfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/ptr-15.ispc runfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/ptr-19.ispc runfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/test-143.ispc runfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-10.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-11.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-12.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-13.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-14.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-2.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-3.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-4.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/atomics-9.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-10.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-8.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-add-9.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-and-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-and-2.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/exclusive-scan-or-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-1.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-10.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-12.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-13.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-2.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-3.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-4.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-5.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-6.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * +./tests/reduce-equal-8.ispc compfail x86-64 generic-16 Linux LLVM 3.5 clang++3.3 -O0 * From 672d43a6cfa135eeeb0b9640cf691526e7eac72d Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Wed, 27 Nov 2013 23:22:50 +0400 Subject: [PATCH 06/21] Adding patch for sse4-i16x8 and sse4-i8x16 targets --- .../3_3_r195476_r195779_i16_sext.patch | 57 +++++++++++++++++++ .../3_4_r195476_r195779_i16_sext.patch | 57 +++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 llvm_patches/3_3_r195476_r195779_i16_sext.patch create mode 100644 llvm_patches/3_4_r195476_r195779_i16_sext.patch diff --git a/llvm_patches/3_3_r195476_r195779_i16_sext.patch b/llvm_patches/3_3_r195476_r195779_i16_sext.patch new file mode 100644 index 00000000..a49325c9 --- /dev/null +++ b/llvm_patches/3_3_r195476_r195779_i16_sext.patch @@ -0,0 +1,57 @@ +Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details. + +Index: lib/Target/X86/X86ISelLowering.cpp +=================================================================== +--- lib/Target/X86/X86ISelLowering.cpp (revision 195862) ++++ lib/Target/X86/X86ISelLowering.cpp (working copy) +@@ -12099,19 +12099,27 @@ + // fall through + case MVT::v4i32: + case MVT::v8i16: { +- // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && +- Op00.getOpcode() == ISD::VECTOR_SHUFFLE) ++ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) { ++ // (sext (vzext x)) -> (vsext x) + Tmp1 = LowerVectorIntExtend(Op00, DAG); +- if (Tmp1.getNode()) { +- SDValue Tmp1Op0 = Tmp1.getOperand(0); +- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && +- "This optimization is invalid without a VZEXT."); +- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ if (Tmp1.getNode()) { ++ EVT ExtraEltVT = ExtraVT.getVectorElementType(); ++ // This folding is only valid when the in-reg type is a vector of i8, ++ // i16, or i32. ++ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 || ++ ExtraEltVT == MVT::i32) { ++ SDValue Tmp1Op0 = Tmp1.getOperand(0); ++ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && ++ "This optimization is invalid without a VZEXT."); ++ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ } ++ Op0 = Tmp1; ++ } + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. +@@ -15826,6 +15834,15 @@ + if (BitWidth == 1) + return SDValue(); + ++ // Check all uses of that condition operand to check whether it will be ++ // consumed by non-BLEND instructions, which may depend on all bits are set ++ // properly. ++ for (SDNode::use_iterator I = Cond->use_begin(), ++ E = Cond->use_end(); I != E; ++I) ++ if (I->getOpcode() != ISD::VSELECT) ++ // TODO: Add other opcodes eventually lowered into BLEND. ++ return SDValue(); ++ + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); + diff --git a/llvm_patches/3_4_r195476_r195779_i16_sext.patch b/llvm_patches/3_4_r195476_r195779_i16_sext.patch new file mode 100644 index 00000000..4e2c0f6b --- /dev/null +++ b/llvm_patches/3_4_r195476_r195779_i16_sext.patch @@ -0,0 +1,57 @@ +Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details. + +Index: lib/Target/X86/X86ISelLowering.cpp +=================================================================== +--- lib/Target/X86/X86ISelLowering.cpp (revision 195863) ++++ lib/Target/X86/X86ISelLowering.cpp (working copy) +@@ -13120,19 +13120,27 @@ + // fall through + case MVT::v4i32: + case MVT::v8i16: { +- // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && +- Op00.getOpcode() == ISD::VECTOR_SHUFFLE) ++ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) { ++ // (sext (vzext x)) -> (vsext x) + Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG); +- if (Tmp1.getNode()) { +- SDValue Tmp1Op0 = Tmp1.getOperand(0); +- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && +- "This optimization is invalid without a VZEXT."); +- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ if (Tmp1.getNode()) { ++ EVT ExtraEltVT = ExtraVT.getVectorElementType(); ++ // This folding is only valid when the in-reg type is a vector of i8, ++ // i16, or i32. ++ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 || ++ ExtraEltVT == MVT::i32) { ++ SDValue Tmp1Op0 = Tmp1.getOperand(0); ++ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && ++ "This optimization is invalid without a VZEXT."); ++ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); ++ } ++ Op0 = Tmp1; ++ } + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. +@@ -17007,6 +17015,15 @@ + if (BitWidth == 1) + return SDValue(); + ++ // Check all uses of that condition operand to check whether it will be ++ // consumed by non-BLEND instructions, which may depend on all bits are set ++ // properly. ++ for (SDNode::use_iterator I = Cond->use_begin(), ++ E = Cond->use_end(); I != E; ++I) ++ if (I->getOpcode() != ISD::VSELECT) ++ // TODO: Add other opcodes eventually lowered into BLEND. ++ return SDValue(); ++ + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); + From eaa483d6e4e08addc311c7b533f1b22184782186 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Thu, 28 Nov 2013 13:51:20 +0400 Subject: [PATCH 07/21] fail_db update (Linux) --- fail_db.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fail_db.txt b/fail_db.txt index 6351fa8f..ff119d5a 100644 --- a/fail_db.txt +++ b/fail_db.txt @@ -277,13 +277,7 @@ .\tests\reduce-min-uint64.ispc runfail x86 avx1-i64x4 Windows LLVM 3.4 cl -O2 * ./tests/atomics-13.ispc compfail x86 sse4-i16x8 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/atomics-13.ispc compfail x86-64 sse4-i16x8 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-4.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-5.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-6.ispc runfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/atomics-13.ispc compfail x86 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-4.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-5.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * -./tests/funcptr-null-6.ispc runfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/atomics-13.ispc compfail x86-64 sse4-i8x16 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/ptr-assign-lhs-math-1.ispc compfail x86-64 generic-4 Linux LLVM 3.3 clang++3.3 -O2 * ./tests/short-vec-8.ispc compfail x86-64 generic-4 Linux LLVM 3.3 clang++3.3 -O2 * From be813ea0a239928bc0bd174870c939d10de2c2d3 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Thu, 14 Nov 2013 15:32:47 +0400 Subject: [PATCH 08/21] Select optimization for LLVM 3.3 --- opt.cpp | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 195 insertions(+), 7 deletions(-) diff --git a/opt.cpp b/opt.cpp index 8df0f4fe..3e320b4b 100644 --- a/opt.cpp +++ b/opt.cpp @@ -127,6 +127,8 @@ static llvm::Pass *CreateDebugPass(char * output); static llvm::Pass *CreateReplaceStdlibShiftPass(); +static llvm::Pass *CreateFixBooleanSelectPass(); + #define DEBUG_START_PASS(NAME) \ if (g->debugPrint && \ (getenv("FUNC") == NULL || \ @@ -659,6 +661,9 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(CreateMakeInternalFuncsStaticPass()); optPM.add(llvm::createGlobalDCEPass()); optPM.add(llvm::createConstantMergePass()); + + // Should be the last + optPM.add(CreateFixBooleanSelectPass(), 400); } // Finish up by making sure we didn't mess anything up in the IR along @@ -670,6 +675,7 @@ Optimize(llvm::Module *module, int optLevel) { printf("\n*****\nFINAL OUTPUT\n*****\n"); module->dump(); } + } @@ -1022,12 +1028,12 @@ InstructionSimplifyPass::simplifyBoolVec(llvm::Value *value) { if (trunc != NULL) { // Convert trunc({sext,zext}(i1 vector)) -> (i1 vector) llvm::SExtInst *sext = llvm::dyn_cast(value); - if (sext && + if (sext && sext->getOperand(0)->getType() == LLVMTypes::Int1VectorType) return sext->getOperand(0); llvm::ZExtInst *zext = llvm::dyn_cast(value); - if (zext && + if (zext && zext->getOperand(0)->getType() == LLVMTypes::Int1VectorType) return zext->getOperand(0); } @@ -1853,7 +1859,7 @@ lIs32BitSafeHelper(llvm::Value *v) { // handle Adds, SExts, Constant Vectors if (llvm::BinaryOperator *bop = llvm::dyn_cast(v)) { if (bop->getOpcode() == llvm::Instruction::Add) { - return lIs32BitSafeHelper(bop->getOperand(0)) + return lIs32BitSafeHelper(bop->getOperand(0)) && lIs32BitSafeHelper(bop->getOperand(1)); } return false; @@ -4961,7 +4967,7 @@ bool ReplaceStdlibShiftPass::runOnBasicBlock(llvm::BasicBlock &bb) { DEBUG_START_PASS("ReplaceStdlibShiftPass"); bool modifiedAny = false; - + llvm::Function *shifts[6]; shifts[0] = m->module->getFunction("__shift_i8"); shifts[1] = m->module->getFunction("__shift_i16"); @@ -4992,19 +4998,19 @@ ReplaceStdlibShiftPass::runOnBasicBlock(llvm::BasicBlock &bb) { } llvm::Value *shuffleIdxs = LLVMInt32Vector(shuffleVals); llvm::Value *zeroVec = llvm::ConstantAggregateZero::get(shiftedVec->getType()); - llvm::Value *shuffle = new llvm::ShuffleVectorInst(shiftedVec, zeroVec, + llvm::Value *shuffle = new llvm::ShuffleVectorInst(shiftedVec, zeroVec, shuffleIdxs, "vecShift", ci); ci->replaceAllUsesWith(shuffle); modifiedAny = true; delete [] shuffleVals; } else { - PerformanceWarning(SourcePos(), "Stdlib shift() called without constant shift amount."); + PerformanceWarning(SourcePos(), "Stdlib shift() called without constant shift amount."); } } } } } - + DEBUG_END_PASS("ReplaceStdlibShiftPass"); return modifiedAny; @@ -5015,3 +5021,185 @@ static llvm::Pass * CreateReplaceStdlibShiftPass() { return new ReplaceStdlibShiftPass(); } + + + +/////////////////////////////////////////////////////////////////////////////// +// FixBooleanSelect +// +// The problem is that in LLVM 3.3, optimizer doesn't like +// the following instruction sequence: +// %cmp = fcmp olt <8 x float> %a, %b +// %sext_cmp = sext <8 x i1> %cmp to <8 x i32> +// %new_mask = and <8 x i32> %sext_cmp, %mask +// and optimizes it to the following: +// %cmp = fcmp olt <8 x float> %a, %b +// %cond = select <8 x i1> %cmp, <8 x i32> %mask, <8 x i32> zeroinitializer +// +// It wouldn't be a problem if codegen produced good code for it. But it +// doesn't, especially for vectors larger than native vectors. +// +// This optimization reverts this pattern and should be the last one before +// code gen. +// +// Note that this problem was introduced in LLVM 3.3. But in LLVM 3.4 it was +// fixed. See commit r194542. +// +// After LLVM 3.3 this optimization should probably stay for experimental +// purposes and code should be compared with and without this optimization from +// time to time to make sure that LLVM does right thing. +/////////////////////////////////////////////////////////////////////////////// + +class FixBooleanSelectPass : public llvm::FunctionPass { +public: + static char ID; + FixBooleanSelectPass() :FunctionPass(ID) {} + + const char *getPassName() const { return "Resolve \"replace extract insert chains\""; } + bool runOnFunction(llvm::Function &F); + +private: + llvm::Instruction* fixSelect(llvm::SelectInst* sel, llvm::SExtInst* sext); +}; + +char FixBooleanSelectPass::ID = 0; + +llvm::Instruction* FixBooleanSelectPass::fixSelect(llvm::SelectInst* sel, llvm::SExtInst* sext) { + // Select instruction result type and its integer equivalent + llvm::VectorType *orig_type = llvm::dyn_cast(sel->getType()); + llvm::VectorType *int_type = llvm::VectorType::getInteger(orig_type); + + // Result value and optional pointer to instruction to delete + llvm::Instruction *result = 0, *optional_to_delete = 0; + + // It can be vector of integers or vector of floating point values. + if (orig_type->getElementType()->isIntegerTy()) { + // Generate sext+and, remove select. + result = llvm::BinaryOperator::CreateAnd(sext, sel->getTrueValue(), "and_mask", sel); + } else { + llvm::BitCastInst* bc = llvm::dyn_cast(sel->getTrueValue()); + + if (bc && bc->hasOneUse() && bc->getSrcTy()->isIntOrIntVectorTy() && bc->getSrcTy()->isVectorTy() && + llvm::isa(bc->getOperand(0)) && + llvm::dyn_cast(bc->getOperand(0))->getParent() == sel->getParent()) { + // Bitcast is casting form integer type, it's operand is instruction, which is located in the same basic block (otherwise it's unsafe to use it). + // bitcast+select => sext+and+bicast + // Create and + llvm::BinaryOperator* and_inst = llvm::BinaryOperator::CreateAnd(sext, bc->getOperand(0), "and_mask", sel); + // Bitcast back to original type + result = new llvm::BitCastInst(and_inst, sel->getType(), "bitcast_mask_out", sel); + // Original bitcast will be removed + optional_to_delete = bc; + } else { + // General case: select => bitcast+sext+and+bitcast + // Bitcast + llvm::BitCastInst* bc_in = new llvm::BitCastInst(sel->getTrueValue(), int_type, "bitcast_mask_in", sel); + // And + llvm::BinaryOperator* and_inst = llvm::BinaryOperator::CreateAnd(sext, bc_in, "and_mask", sel); + // Bitcast back to original type + result = new llvm::BitCastInst(and_inst, sel->getType(), "bitcast_mask_out", sel); + } + } + + // Done, finalize. + sel->replaceAllUsesWith(result); + sel->eraseFromParent(); + if (optional_to_delete) { + optional_to_delete->eraseFromParent(); + } + + return result; +} + +bool +FixBooleanSelectPass::runOnFunction(llvm::Function &F) { + bool modifiedAny = false; + + // LLVM 3.3 only +#if defined(LLVM_3_3) + + for (llvm::Function::iterator I = F.begin(), E = F.end(); + I != E; ++I) { + llvm::BasicBlock* bb = &*I; + for (llvm::BasicBlock::iterator iter = bb->begin(), e = bb->end(); iter != e; ++iter) { + llvm::Instruction *inst = &*iter; + + llvm::CmpInst *cmp = llvm::dyn_cast(inst); + + if (cmp && + cmp->getType()->isVectorTy() && + cmp->getType()->getVectorElementType()->isIntegerTy(1)) { + + // Search for select instruction uses. + int selects = 0; + llvm::VectorType* sext_type = 0; + for (llvm::Instruction::use_iterator it=cmp->use_begin(); it!=cmp->use_end(); ++it ) { + llvm::SelectInst* sel = llvm::dyn_cast(*it); + if (sel && + sel->getType()->isVectorTy() && + sel->getType()->getScalarSizeInBits() > 1) { + selects++; + // We pick the first one, but typical case when all select types are the same. + sext_type = llvm::dyn_cast(sel->getType()); + break; + } + } + if (selects == 0) { + continue; + } + // Get an integer equivalent, if it's not yet an integer. + sext_type = llvm::VectorType::getInteger(sext_type); + + // Do transformation + llvm::BasicBlock::iterator iter_copy=iter; + llvm::Instruction* next_inst = &*(++iter_copy); + // Create or reuse sext + llvm::SExtInst* sext = llvm::dyn_cast(next_inst); + if (sext && + sext->getOperand(0) == cmp && + sext->getDestTy() == sext_type) { + // This sext can be reused + } else { + if (next_inst) { + sext = new llvm::SExtInst(cmp, sext_type, "sext_cmp", next_inst); + } else { + sext = new llvm::SExtInst(cmp, sext_type, "sext_cmp", bb); + } + } + + // Walk and fix selects + std::vector sel_uses; + for (llvm::Instruction::use_iterator it=cmp->use_begin(); it!=cmp->use_end(); ++it) { + llvm::SelectInst* sel = llvm::dyn_cast(*it); + if (sel && + sel->getType()->getScalarSizeInBits() == sext_type->getScalarSizeInBits()) { + + // Check that second operand is zero. + llvm::Constant* false_cond = llvm::dyn_cast(sel->getFalseValue()); + if (false_cond && + false_cond->isZeroValue()) { + sel_uses.push_back(sel); + modifiedAny = true; + } + } + } + + for (int i=0; i Date: Thu, 28 Nov 2013 21:44:12 +0400 Subject: [PATCH 09/21] Run alloy -j by default --- alloy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/alloy.py b/alloy.py index 21e428de..525f90d0 100755 --- a/alloy.py +++ b/alloy.py @@ -635,6 +635,7 @@ import platform import smtplib import datetime import copy +import multiprocessing from email.MIMEMultipart import MIMEMultipart from email.MIMEBase import MIMEBase from email.mime.text import MIMEText @@ -663,13 +664,14 @@ if __name__ == '__main__': "Try to build compiler with all LLVM\n\talloy.py -r --only=build\n" + "Performance validation run with 10 runs of each test and comparing to branch 'old'\n\talloy.py -r --only=performance --compare-with=old --number=10\n" + "Validation run. Update fail_db.txt with new fails, send results to my@my.com\n\talloy.py -r --update-errors=F --notify='my@my.com'\n") + num_threads="%s" % multiprocessing.cpu_count() parser = MyParser(usage="Usage: alloy.py -r/-b [options]", epilog=examples) parser.add_option('-b', '--build-llvm', dest='build_llvm', help='ask to build LLVM', default=False, action="store_true") parser.add_option('-r', '--run', dest='validation_run', help='ask for validation run', default=False, action="store_true") parser.add_option('-j', dest='speed', - help='set -j for make', default="8") + help='set -j for make', default=num_threads) # options for activity "build LLVM" llvm_group = OptionGroup(parser, "Options for building LLVM", "These options must be used with -b option.") From b94b89ba68e6da9c3c7b8a2eccbb2475d3a04765 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Fri, 29 Nov 2013 14:24:21 +0400 Subject: [PATCH 10/21] support of LLVM trunk --- cbackend.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cbackend.cpp b/cbackend.cpp index 8535653f..40f87074 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -241,7 +241,11 @@ namespace { class CBEMCAsmInfo : public llvm::MCAsmInfo { public: CBEMCAsmInfo() { +#if defined(LLVM_3_5) + GlobalPrefix = '\0'; +#else GlobalPrefix = ""; +#endif PrivateGlobalPrefix = ""; } }; From 3bc4788acb31ae1387389bbb77c9b60f0dd4f73a Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Sun, 1 Dec 2013 03:45:00 +0400 Subject: [PATCH 11/21] Fix errors with VS2013 --- ispc.vcxproj | 336 +++++++++++++-------------------------------------- 1 file changed, 84 insertions(+), 252 deletions(-) diff --git a/ispc.vcxproj b/ispc.vcxproj index b9a3b6c5..218bfd5c 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -57,17 +57,17 @@ - - - - + + + + - - - - - + + + + + 4146;4800;4996;4355;4624;4005;4003;4018 @@ -132,383 +132,215 @@ Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll 32bit > $(Configuration)/gen-bitcode-sse4-32bit.cpp - $(Configuration)/gen-bitcode-sse4-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll 32bit > $(Configuration)/gen-bitcode-sse4-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll 64bit > $(Configuration)/gen-bitcode-sse4-64bit.cpp + $(Configuration)/gen-bitcode-sse4-32bit.cpp; $(Configuration)/gen-bitcode-sse4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-32bit.cpp + Building gen-bitcode-sse4-32bit.cpp and gen-bitcode-sse4-64bit.cpp - + Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll 64bit > $(Configuration)/gen-bitcode-sse4-64bit.cpp - $(Configuration)/gen-bitcode-sse4-64bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-8.ll | python bitcode2cpp.py builtins\target-sse4-8.ll 32bit > $(Configuration)/gen-bitcode-sse4-8-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-8.ll | python bitcode2cpp.py builtins\target-sse4-8.ll 64bit > $(Configuration)/gen-bitcode-sse4-8-64bit.cpp + $(Configuration)/gen-bitcode-sse4-8-32bit.cpp; $(Configuration)/gen-bitcode-sse4-8-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-64bit.cpp + Building gen-bitcode-sse4-8-32bit.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-16.ll | python bitcode2cpp.py builtins\target-sse4-16.ll 32bit > $(Configuration)/gen-bitcode-sse4-16-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-16.ll | python bitcode2cpp.py builtins\target-sse4-16.ll 64bit > $(Configuration)/gen-bitcode-sse4-16-64bit.cpp + $(Configuration)/gen-bitcode-sse4-16-32bit.cpp; $(Configuration)/gen-bitcode-sse4-16-64bit.cpp + builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll + Building gen-bitcode-sse4-16-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-8.ll | python bitcode2cpp.py builtins\target-sse4-8.ll 32bit > $(Configuration)/gen-bitcode-sse4-8-32bit.cpp - $(Configuration)/gen-bitcode-sse4-8-32bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-8-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-8.ll | python bitcode2cpp.py builtins\target-sse4-8.ll 64bit > $(Configuration)/gen-bitcode-sse4-8-64bit.cpp - $(Configuration)/gen-bitcode-sse4-8-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-8-64bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-16.ll | python bitcode2cpp.py builtins\target-sse4-16.ll 32bit > $(Configuration)/gen-bitcode-sse4-16-32bit.cpp - $(Configuration)/gen-bitcode-sse4-16-32bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-16-32bit.cpp - - - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-16.ll | python bitcode2cpp.py builtins\target-sse4-16.ll 64bit > $(Configuration)/gen-bitcode-sse4-16-64bit.cpp - $(Configuration)/gen-bitcode-sse4-16-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-16-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll 32bit > $(Configuration)/gen-bitcode-sse4-x2-32bit.cpp - $(Configuration)/gen-bitcode-sse4-x2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll 32bit > $(Configuration)/gen-bitcode-sse4-x2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll 64bit > $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp + $(Configuration)/gen-bitcode-sse4-x2-32bit.cpp; $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll Building gen-bitcode-sse4-x2-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll 64bit > $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp - $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-x2-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll 32bit > $(Configuration)/gen-bitcode-sse2-32bit.cpp - $(Configuration)/gen-bitcode-sse2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll 32bit > $(Configuration)/gen-bitcode-sse2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll 64bit > $(Configuration)/gen-bitcode-sse2-64bit.cpp + $(Configuration)/gen-bitcode-sse2-32bit.cpp; $(Configuration)/gen-bitcode-sse2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse2-common.ll Building gen-bitcode-sse2-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll 64bit > $(Configuration)/gen-bitcode-sse2-64bit.cpp - $(Configuration)/gen-bitcode-sse2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse2-common.ll - Building gen-bitcode-sse2-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll 32bit > $(Configuration)/gen-bitcode-sse2-x2-32bit.cpp - $(Configuration)/gen-bitcode-sse2-x2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll 32bit > $(Configuration)/gen-bitcode-sse2-x2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll 64bit > $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp + $(Configuration)/gen-bitcode-sse2-x2-32bit.cpp; $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse2-common.ll Building gen-bitcode-sse2-x2-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll 64bit > $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp - $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-sse2-common.ll - Building gen-bitcode-sse2-x2-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 32bit > $(Configuration)/gen-bitcode-avx1-32bit.cpp - $(Configuration)/gen-bitcode-avx1-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 32bit > $(Configuration)/gen-bitcode-avx1-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 64bit > $(Configuration)/gen-bitcode-avx1-64bit.cpp + $(Configuration)/gen-bitcode-avx1-32bit.cpp; $(Configuration)/gen-bitcode-avx1-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll Building gen-bitcode-avx1-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 64bit > $(Configuration)/gen-bitcode-avx1-64bit.cpp - $(Configuration)/gen-bitcode-avx1-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx1-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll 32bit > $(Configuration)/gen-bitcode-avx1-x2-32bit.cpp - $(Configuration)/gen-bitcode-avx1-x2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll 32bit > $(Configuration)/gen-bitcode-avx1-x2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll 64bit > $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp + $(Configuration)/gen-bitcode-avx1-x2-32bit.cpp; $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll Building gen-bitcode-avx1-x2-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll 64bit > $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp - $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx1-x2-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1-i64x4.ll | python bitcode2cpp.py builtins\target-avx1-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx1-i64x4-32bit.cpp - $(Configuration)/gen-bitcode-avx1-i64x4-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1-i64x4.ll | python bitcode2cpp.py builtins\target-avx1-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx1-i64x4-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1-i64x4.ll | python bitcode2cpp.py builtins\target-avx1-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx1-i64x4-64bit.cpp + $(Configuration)/gen-bitcode-avx1-i64x4-32bit.cpp; $(Configuration)/gen-bitcode-avx1-i64x4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll Building gen-bitcode-avx1-i64x4-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1-i64x4.ll | python bitcode2cpp.py builtins\target-avx1-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx1-i64x4-64bit.cpp - $(Configuration)/gen-bitcode-avx1-i64x4-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx1-i64x4-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll 32bit > $(Configuration)/gen-bitcode-avx11-32bit.cpp - $(Configuration)/gen-bitcode-avx11-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll 32bit > $(Configuration)/gen-bitcode-avx11-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll 64bit > $(Configuration)/gen-bitcode-avx11-64bit.cpp + $(Configuration)/gen-bitcode-avx11-32bit.cpp; $(Configuration)/gen-bitcode-avx11-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll Building gen-bitcode-avx11-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll 64bit > $(Configuration)/gen-bitcode-avx11-64bit.cpp - $(Configuration)/gen-bitcode-avx11-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx11-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll 32bit > $(Configuration)/gen-bitcode-avx11-x2-32bit.cpp - $(Configuration)/gen-bitcode-avx11-x2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll 32bit > $(Configuration)/gen-bitcode-avx11-x2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll 64bit > $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp + $(Configuration)/gen-bitcode-avx11-x2-32bit.cpp; $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll Building gen-bitcode-avx11-x2-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll 64bit > $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp - $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx11-x2-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp - $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp + $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp; $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll Building gen-bitcode-avx11-i64x4-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp - $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx11-i64x4-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll 32bit > $(Configuration)/gen-bitcode-avx2-32bit.cpp - $(Configuration)/gen-bitcode-avx2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll 32bit > $(Configuration)/gen-bitcode-avx2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll 64bit > $(Configuration)/gen-bitcode-avx2-64bit.cpp + $(Configuration)/gen-bitcode-avx2-32bit.cpp; $(Configuration)/gen-bitcode-avx2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll Building gen-bitcode-avx2-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll 64bit > $(Configuration)/gen-bitcode-avx2-64bit.cpp - $(Configuration)/gen-bitcode-avx2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx2-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll 32bit > $(Configuration)/gen-bitcode-avx2-x2-32bit.cpp - $(Configuration)/gen-bitcode-avx2-x2-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll 32bit > $(Configuration)/gen-bitcode-avx2-x2-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll 64bit > $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp + $(Configuration)/gen-bitcode-avx2-x2-32bit.cpp; $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll Building gen-bitcode-avx2-x2-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll 64bit > $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp - $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx2-x2-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp - $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 32bit > $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp + $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp; $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll Building gen-bitcode-avx2-i64x4-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp - $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx2-i64x4-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll 32bit > $(Configuration)/gen-bitcode-generic-1-32bit.cpp - $(Configuration)/gen-bitcode-generic-1-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll 32bit > $(Configuration)/gen-bitcode-generic-1-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll 64bit > $(Configuration)/gen-bitcode-generic-1-64bit.cpp + $(Configuration)/gen-bitcode-generic-1-32bit.cpp; $(Configuration)/gen-bitcode-generic-1-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-1-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll 64bit > $(Configuration)/gen-bitcode-generic-1-64bit.cpp - $(Configuration)/gen-bitcode-generic-1-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-1-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll 32bit > $(Configuration)/gen-bitcode-generic-4-32bit.cpp - $(Configuration)/gen-bitcode-generic-4-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll 32bit > $(Configuration)/gen-bitcode-generic-4-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll 64bit > $(Configuration)/gen-bitcode-generic-4-64bit.cpp + $(Configuration)/gen-bitcode-generic-4-32bit.cpp; $(Configuration)/gen-bitcode-generic-4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-4-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll 64bit > $(Configuration)/gen-bitcode-generic-4-64bit.cpp - $(Configuration)/gen-bitcode-generic-4-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-4-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll 32bit > $(Configuration)/gen-bitcode-generic-8-32bit.cpp - $(Configuration)/gen-bitcode-generic-8-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll 32bit > $(Configuration)/gen-bitcode-generic-8-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll 64bit > $(Configuration)/gen-bitcode-generic-8-64bit.cpp + $(Configuration)/gen-bitcode-generic-8-32bit.cpp; $(Configuration)/gen-bitcode-generic-8-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-8-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll 64bit > $(Configuration)/gen-bitcode-generic-8-64bit.cpp - $(Configuration)/gen-bitcode-generic-8-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-8-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll 32bit > $(Configuration)/gen-bitcode-generic-16-32bit.cpp - $(Configuration)/gen-bitcode-generic-16-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll 32bit > $(Configuration)/gen-bitcode-generic-16-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll 64bit > $(Configuration)/gen-bitcode-generic-16-64bit.cpp + $(Configuration)/gen-bitcode-generic-16-32bit.cpp; $(Configuration)/gen-bitcode-generic-16-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-16-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll 64bit > $(Configuration)/gen-bitcode-generic-16-64bit.cpp - $(Configuration)/gen-bitcode-generic-16-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-16-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll 32bit > $(Configuration)/gen-bitcode-generic-32-32bit.cpp - $(Configuration)/gen-bitcode-generic-32-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll 32bit > $(Configuration)/gen-bitcode-generic-32-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll 64bit > $(Configuration)/gen-bitcode-generic-32-64bit.cpp + $(Configuration)/gen-bitcode-generic-32-32bit.cpp; $(Configuration)/gen-bitcode-generic-32-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-32-32bit.cpp - - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll 64bit > $(Configuration)/gen-bitcode-generic-32-64bit.cpp - $(Configuration)/gen-bitcode-generic-32-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-32-64bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll 32bit > $(Configuration)/gen-bitcode-generic-64-32bit.cpp - $(Configuration)/gen-bitcode-generic-64-32bit.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll 32bit > $(Configuration)/gen-bitcode-generic-64-32bit.cpp; + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll 64bit > $(Configuration)/gen-bitcode-generic-64-64bit.cpp + $(Configuration)/gen-bitcode-generic-64-32bit.cpp; $(Configuration)/gen-bitcode-generic-64-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll Building gen-bitcode-generic-64-32bit.cpp - - Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll 64bit > $(Configuration)/gen-bitcode-generic-64-64bit.cpp - $(Configuration)/gen-bitcode-generic-64-64bit.cpp - builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-64-64bit.cpp - - - - + Document flex -t lex.ll > $(Configuration)\lex.cc $(Configuration)\lex.cc @@ -597,4 +429,4 @@ - + From e172d7f1a95076c6ea9e70cac9d20c2edc2848d1 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Sun, 1 Dec 2013 16:18:06 +0400 Subject: [PATCH 12/21] Update build messages (Windows) --- ispc.vcxproj | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/ispc.vcxproj b/ispc.vcxproj index 218bfd5c..8aee2988 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -146,7 +146,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-8.ll | python bitcode2cpp.py builtins\target-sse4-8.ll 64bit > $(Configuration)/gen-bitcode-sse4-8-64bit.cpp $(Configuration)/gen-bitcode-sse4-8-32bit.cpp; $(Configuration)/gen-bitcode-sse4-8-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-8-32bit.cpp + Building gen-bitcode-sse4-8-32bit.cpp and gen-bitcode-sse4-8-64bit.cpp @@ -156,7 +156,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-16.ll | python bitcode2cpp.py builtins\target-sse4-16.ll 64bit > $(Configuration)/gen-bitcode-sse4-16-64bit.cpp $(Configuration)/gen-bitcode-sse4-16-32bit.cpp; $(Configuration)/gen-bitcode-sse4-16-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-16-32bit.cpp + Building gen-bitcode-sse4-16-32bit.cpp and gen-bitcode-sse4-16-64bit.cpp @@ -166,7 +166,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll 64bit > $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp $(Configuration)/gen-bitcode-sse4-x2-32bit.cpp; $(Configuration)/gen-bitcode-sse4-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse4-common.ll - Building gen-bitcode-sse4-x2-32bit.cpp + Building gen-bitcode-sse4-x2-32bit.cpp and gen-bitcode-sse4-x2-64bit.cpp @@ -176,7 +176,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll 64bit > $(Configuration)/gen-bitcode-sse2-64bit.cpp $(Configuration)/gen-bitcode-sse2-32bit.cpp; $(Configuration)/gen-bitcode-sse2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse2-common.ll - Building gen-bitcode-sse2-32bit.cpp + Building gen-bitcode-sse2-32bit.cpp and gen-bitcode-sse2-64bit.cpp @@ -186,7 +186,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll 64bit > $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp $(Configuration)/gen-bitcode-sse2-x2-32bit.cpp; $(Configuration)/gen-bitcode-sse2-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-sse2-common.ll - Building gen-bitcode-sse2-x2-32bit.cpp + Building gen-bitcode-sse2-x2-32bit.cpp and gen-bitcode-sse2-x2-64bit.cpp @@ -196,7 +196,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 64bit > $(Configuration)/gen-bitcode-avx1-64bit.cpp $(Configuration)/gen-bitcode-avx1-32bit.cpp; $(Configuration)/gen-bitcode-avx1-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx1-32bit.cpp + Building gen-bitcode-avx1-32bit.cpp and gen-bitcode-avx1-64bit.cpp @@ -206,7 +206,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll 64bit > $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp $(Configuration)/gen-bitcode-avx1-x2-32bit.cpp; $(Configuration)/gen-bitcode-avx1-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx1-x2-32bit.cpp + Building gen-bitcode-avx1-x2-32bit.cpp and gen-bitcode-avx1-x2-64bit.cpp @@ -216,7 +216,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1-i64x4.ll | python bitcode2cpp.py builtins\target-avx1-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx1-i64x4-64bit.cpp $(Configuration)/gen-bitcode-avx1-i64x4-32bit.cpp; $(Configuration)/gen-bitcode-avx1-i64x4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx1-i64x4-32bit.cpp + Building gen-bitcode-avx1-i64x4-32bit.cpp and gen-bitcode-avx1-i64x4-64bit.cpp @@ -226,7 +226,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll 64bit > $(Configuration)/gen-bitcode-avx11-64bit.cpp $(Configuration)/gen-bitcode-avx11-32bit.cpp; $(Configuration)/gen-bitcode-avx11-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx11-32bit.cpp + Building gen-bitcode-avx11-32bit.cpp and gen-bitcode-avx11-64bit.cpp @@ -236,7 +236,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll 64bit > $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp $(Configuration)/gen-bitcode-avx11-x2-32bit.cpp; $(Configuration)/gen-bitcode-avx11-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx11-x2-32bit.cpp + Building gen-bitcode-avx11-x2-32bit.cpp and gen-bitcode-avx11-x2-64bit.cpp @@ -246,7 +246,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx11-i64x4.ll | python bitcode2cpp.py builtins\target-avx11-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp $(Configuration)/gen-bitcode-avx11-i64x4-32bit.cpp; $(Configuration)/gen-bitcode-avx11-i64x4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx11-i64x4-32bit.cpp + Building gen-bitcode-avx11-i64x4-32bit.cpp and gen-bitcode-avx11-i64x4-64bit.cpp @@ -256,7 +256,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll 64bit > $(Configuration)/gen-bitcode-avx2-64bit.cpp $(Configuration)/gen-bitcode-avx2-32bit.cpp; $(Configuration)/gen-bitcode-avx2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll - Building gen-bitcode-avx2-32bit.cpp + Building gen-bitcode-avx2-32bit.cpp and gen-bitcode-avx2-64bit.cpp @@ -266,7 +266,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll 64bit > $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp $(Configuration)/gen-bitcode-avx2-x2-32bit.cpp; $(Configuration)/gen-bitcode-avx2-x2-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll - Building gen-bitcode-avx2-x2-32bit.cpp + Building gen-bitcode-avx2-x2-32bit.cpp and gen-bitcode-avx2-x2-64bit.cpp @@ -276,7 +276,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx2-i64x4.ll | python bitcode2cpp.py builtins\target-avx2-i64x4.ll 64bit > $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp $(Configuration)/gen-bitcode-avx2-i64x4-32bit.cpp; $(Configuration)/gen-bitcode-avx2-i64x4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-avx-common.ll;builtins\target-avx.ll;builtins\target-avx1-i64x4base.ll - Building gen-bitcode-avx2-i64x4-32bit.cpp + Building gen-bitcode-avx2-i64x4-32bit.cpp and gen-bitcode-avx2-i64x4-64bit.cpp @@ -286,7 +286,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll 64bit > $(Configuration)/gen-bitcode-generic-1-64bit.cpp $(Configuration)/gen-bitcode-generic-1-32bit.cpp; $(Configuration)/gen-bitcode-generic-1-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-1-32bit.cpp + Building gen-bitcode-generic-1-32bit.cpp and gen-bitcode-generic-1-64bit.cpp @@ -296,7 +296,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll 64bit > $(Configuration)/gen-bitcode-generic-4-64bit.cpp $(Configuration)/gen-bitcode-generic-4-32bit.cpp; $(Configuration)/gen-bitcode-generic-4-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-4-32bit.cpp + Building gen-bitcode-generic-4-32bit.cpp and gen-bitcode-generic-4-64bit.cpp @@ -306,7 +306,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll 64bit > $(Configuration)/gen-bitcode-generic-8-64bit.cpp $(Configuration)/gen-bitcode-generic-8-32bit.cpp; $(Configuration)/gen-bitcode-generic-8-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-8-32bit.cpp + Building gen-bitcode-generic-8-32bit.cpp and gen-bitcode-generic-8-64bit.cpp @@ -316,7 +316,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll 64bit > $(Configuration)/gen-bitcode-generic-16-64bit.cpp $(Configuration)/gen-bitcode-generic-16-32bit.cpp; $(Configuration)/gen-bitcode-generic-16-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-16-32bit.cpp + Building gen-bitcode-generic-16-32bit.cpp and gen-bitcode-generic-16-64bit.cpp @@ -326,7 +326,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll 64bit > $(Configuration)/gen-bitcode-generic-32-64bit.cpp $(Configuration)/gen-bitcode-generic-32-32bit.cpp; $(Configuration)/gen-bitcode-generic-32-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-32-32bit.cpp + Building gen-bitcode-generic-32-32bit.cpp and gen-bitcode-generic-32-64bit.cpp @@ -336,7 +336,7 @@ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll 64bit > $(Configuration)/gen-bitcode-generic-64-64bit.cpp $(Configuration)/gen-bitcode-generic-64-32bit.cpp; $(Configuration)/gen-bitcode-generic-64-64bit.cpp builtins\util.m4;builtins\svml.m4;builtins\target-generic-common.ll - Building gen-bitcode-generic-64-32bit.cpp + Building gen-bitcode-generic-64-32bit.cpp and gen-bitcode-generic-64-64bit.cpp From 31ee2951ce84cb1dc758ea4906df22cc9e9b6b01 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Tue, 3 Dec 2013 19:40:30 +0400 Subject: [PATCH 13/21] Adding LLVM 3.4 definition to alloy.py --- alloy.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/alloy.py b/alloy.py index 525f90d0..657e67bf 100755 --- a/alloy.py +++ b/alloy.py @@ -88,6 +88,9 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, FOLDER_NAME=version_LLVM if version_LLVM == "trunk": SVN_PATH="trunk" + if version_LLVM == "3.4": + SVN_PATH="tags/RELEASE_34/rc1" + version_LLVM = "3_4" if version_LLVM == "3.3": SVN_PATH="tags/RELEASE_33/final" version_LLVM = "3_3" @@ -273,8 +276,10 @@ def build_ispc(version_LLVM, make): os.environ["LLVM_INSTALL_DIR"] = os.environ["LLVM_HOME"] + "\\bin-" + version_LLVM if version_LLVM == "3.3": temp = "3_3" - if version_LLVM == "trunk": + if version_LLVM == "3.4": temp = "3_4" + if version_LLVM == "trunk": + temp = "3_5" os.environ["LLVM_VERSION"] = "LLVM_" + temp try_do_LLVM("clean ISPC for building", "msbuild ispc.vcxproj /t:clean", True) try_do_LLVM("build ISPC with LLVM version " + version_LLVM + " ", "msbuild ispc.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release /t:rebuild", True) @@ -376,7 +381,7 @@ def validation_run(only, only_targets, reference_branch, number, notify, update, archs.append("x86-64") if "native" in only: sde_targets_t = [] - for i in ["3.1", "3.2", "3.3", "trunk"]: + for i in ["3.1", "3.2", "3.3", "3.4", "trunk"]: if i in only: LLVM.append(i) if "current" in only: @@ -676,7 +681,7 @@ if __name__ == '__main__': llvm_group = OptionGroup(parser, "Options for building LLVM", "These options must be used with -b option.") llvm_group.add_option('--version', dest='version', - help='version of llvm to build: 3.1 3.2 3.3 trunk. Default: trunk', default="trunk") + help='version of llvm to build: 3.1 3.2 3.3 3.4 trunk. Default: trunk', default="trunk") llvm_group.add_option('--revision', dest='revision', help='revision of llvm to build in format r172870', default="") llvm_group.add_option('--debug', dest='debug', @@ -711,7 +716,7 @@ if __name__ == '__main__': run_group.add_option('--only', dest='only', help='set types of tests. Possible values:\n' + '-O0, -O2, x86, x86-64, stability (test only stability), performance (test only performance)\n' + - 'build (only build with different LLVM), 3.1, 3.2, 3.3, trunk, native (do not use SDE), current (do not rebuild ISPC).', + 'build (only build with different LLVM), 3.1, 3.2, 3.3, 3.4, trunk, native (do not use SDE), current (do not rebuild ISPC).', default="") run_group.add_option('--perf_LLVM', dest='perf_llvm', help='compare LLVM 3.3 with "--compare-with", default trunk', default=False, action='store_true') From f61f1a20207eeefbde91359d279ea57a91c9e7f7 Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Tue, 3 Dec 2013 19:52:11 +0400 Subject: [PATCH 14/21] Fixing run_tests.py to understand LLVM 3.4 --- run_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_tests.py b/run_tests.py index 3f03cc9b..e6429861 100755 --- a/run_tests.py +++ b/run_tests.py @@ -452,7 +452,7 @@ def verify(): f_lines = f.readlines() f.close() check = [["g++", "clang++", "cl"],["-O0", "-O2"],["x86","x86-64"], - ["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM head"], + ["Linux","Windows","Mac"],["LLVM 3.1","LLVM 3.2","LLVM 3.3","LLVM 3.4","LLVM trunk"], ["sse2-i32x4", "sse2-i32x8", "sse4-i32x4", "sse4-i32x8", "sse4-i16x8", "sse4-i8x16", "avx1-i32x4" "avx1-i32x8", "avx1-i32x16", "avx1-i64x4", "avx1.1-i32x8", "avx1.1-i32x16", "avx1.1-i64x4", "avx2-i32x8", "avx2-i32x16", "avx2-i64x4", From 2d2d14744b2baa32c9129664c2f3816df5b915ff Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Wed, 4 Dec 2013 19:00:02 +0400 Subject: [PATCH 15/21] Fixing --opt=force-aligned-memory for LLVM 3.3+ --- ctx.cpp | 30 +++++++++++++++++++++++++----- ispc.cpp | 26 ++++++++++++++++++++++++++ ispc.h | 9 +++++++++ opt.cpp | 16 ++++++++++++---- 4 files changed, 72 insertions(+), 9 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index c1a7e61a..e5c60363 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -316,7 +316,11 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, llvm::BasicBlock *offBB = llvm::BasicBlock::Create(*g->ctx, "entry", (llvm::Function *)offFunc, 0); - new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB); + llvm::StoreInst *inst = + new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB); + if (g->opt.forceAlignedMemory) { + inst->setAlignment(g->target->getNativeVectorAlignment()); + } llvm::ReturnInst::Create(*g->ctx, offBB); } @@ -2437,7 +2441,13 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) { if (name == NULL) name = LLVMGetName(ptr, "_load"); - llvm::Instruction *inst = new llvm::LoadInst(ptr, name, bblock); + llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, bblock); + + if (g->opt.forceAlignedMemory && + llvm::dyn_cast(pt->getElementType())) { + inst->setAlignment(g->target->getNativeVectorAlignment()); + } + AddDebugPos(inst); return inst; } @@ -2719,7 +2729,7 @@ FunctionEmitContext::AllocaInst(llvm::Type *llvmType, inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock); // If no alignment was specified but we have an array of a uniform - // type, then align it to 4 * the native vector width; it's not + // type, then align it to the native vector alignment; it's not // unlikely that this array will be loaded into varying variables with // what will be aligned accesses if the uniform -> varying load is done // in regular chunks. @@ -2727,7 +2737,7 @@ FunctionEmitContext::AllocaInst(llvm::Type *llvmType, llvm::dyn_cast(llvmType); if (align == 0 && arrayType != NULL && !llvm::isa(arrayType->getElementType())) - align = 4 * g->target->getNativeVectorWidth(); + align = g->target->getNativeVectorAlignment(); if (align != 0) inst->setAlignment(align); @@ -2986,7 +2996,17 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) { return; } - llvm::Instruction *inst = new llvm::StoreInst(value, ptr, bblock); + llvm::PointerType *pt = + llvm::dyn_cast(ptr->getType()); + AssertPos(currentPos, pt != NULL); + + llvm::StoreInst *inst = new llvm::StoreInst(value, ptr, bblock); + + if (g->opt.forceAlignedMemory && + llvm::dyn_cast(pt->getElementType())) { + inst->setAlignment(g->target->getNativeVectorAlignment()); + } + AddDebugPos(inst); } diff --git a/ispc.cpp b/ispc.cpp index 36d31580..b1790dc3 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -191,6 +191,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : m_tf_attributes(NULL), #endif m_nativeVectorWidth(-1), + m_nativeVectorAlignment(-1), m_dataTypeWidth(-1), m_vectorWidth(-1), m_generatePIC(pic), @@ -309,6 +310,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "sse2-i32x4")) { this->m_isa = Target::SSE2; this->m_nativeVectorWidth = 4; + this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 32; this->m_vectorWidth = 4; this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt" @@ -325,6 +327,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "sse2-i32x8")) { this->m_isa = Target::SSE2; this->m_nativeVectorWidth = 4; + this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; this->m_attributes = "+sse,+sse2,-sse3,-sse4a,-ssse3,-popcnt" @@ -341,6 +344,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "sse4-i32x4")) { this->m_isa = Target::SSE4; this->m_nativeVectorWidth = 4; + this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 32; this->m_vectorWidth = 4; // TODO: why not sse42 and popcnt? @@ -359,6 +363,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "sse4-i32x8")) { this->m_isa = Target::SSE4; this->m_nativeVectorWidth = 4; + this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" @@ -374,6 +379,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "sse4-i8x16")) { this->m_isa = Target::SSE4; this->m_nativeVectorWidth = 16; + this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 8; this->m_vectorWidth = 16; this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" @@ -389,6 +395,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "sse4-i16x8")) { this->m_isa = Target::SSE4; this->m_nativeVectorWidth = 8; + this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 16; this->m_vectorWidth = 8; this->m_attributes = "+sse,+sse2,+sse3,-sse4a,+ssse3,-popcnt,+cmov" @@ -405,6 +412,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "generic-x4")) { this->m_isa = Target::GENERIC; this->m_nativeVectorWidth = 4; + this->m_nativeVectorAlignment = 16; this->m_vectorWidth = 4; this->m_maskingIsFree = true; this->m_maskBitCount = 1; @@ -416,6 +424,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "generic-x8")) { this->m_isa = Target::GENERIC; this->m_nativeVectorWidth = 8; + this->m_nativeVectorAlignment = 32; this->m_vectorWidth = 8; this->m_maskingIsFree = true; this->m_maskBitCount = 1; @@ -427,6 +436,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "generic-x16")) { this->m_isa = Target::GENERIC; this->m_nativeVectorWidth = 16; + this->m_nativeVectorAlignment = 64; this->m_vectorWidth = 16; this->m_maskingIsFree = true; this->m_maskBitCount = 1; @@ -438,6 +448,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "generic-x32")) { this->m_isa = Target::GENERIC; this->m_nativeVectorWidth = 32; + this->m_nativeVectorAlignment = 64; this->m_vectorWidth = 32; this->m_maskingIsFree = true; this->m_maskBitCount = 1; @@ -449,6 +460,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "generic-x64")) { this->m_isa = Target::GENERIC; this->m_nativeVectorWidth = 64; + this->m_nativeVectorAlignment = 64; this->m_vectorWidth = 64; this->m_maskingIsFree = true; this->m_maskBitCount = 1; @@ -460,6 +472,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "generic-x1")) { this->m_isa = Target::GENERIC; this->m_nativeVectorWidth = 1; + this->m_nativeVectorAlignment = 16; this->m_vectorWidth = 1; this->m_maskingIsFree = false; this->m_maskBitCount = 32; @@ -467,6 +480,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "avx1-i32x4")) { this->m_isa = Target::AVX; this->m_nativeVectorWidth = 8; + this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 4; this->m_attributes = "+avx,+popcnt,+cmov"; @@ -478,6 +492,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx1-i32x8")) { this->m_isa = Target::AVX; this->m_nativeVectorWidth = 8; + this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; this->m_attributes = "+avx,+popcnt,+cmov"; @@ -488,6 +503,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx1-i64x4")) { this->m_isa = Target::AVX; this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */ + this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 64; this->m_vectorWidth = 4; this->m_attributes = "+avx,+popcnt,+cmov"; @@ -499,6 +515,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx1-i32x16")) { this->m_isa = Target::AVX; this->m_nativeVectorWidth = 8; + this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 16; this->m_attributes = "+avx,+popcnt,+cmov"; @@ -509,6 +526,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx1.1-i32x8")) { this->m_isa = Target::AVX11; this->m_nativeVectorWidth = 8; + this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; this->m_attributes = "+avx,+popcnt,+cmov,+f16c" @@ -530,6 +548,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx1.1-i32x16")) { this->m_isa = Target::AVX11; this->m_nativeVectorWidth = 8; + this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 16; this->m_attributes = "+avx,+popcnt,+cmov,+f16c" @@ -550,6 +569,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "avx1.1-i64x4")) { this->m_isa = Target::AVX11; this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */ + this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 64; this->m_vectorWidth = 4; this->m_attributes = "+avx,+popcnt,+cmov,+f16c" @@ -571,6 +591,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx2-i32x8")) { this->m_isa = Target::AVX2; this->m_nativeVectorWidth = 8; + this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 8; this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" @@ -596,6 +617,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "avx2-i32x16")) { this->m_isa = Target::AVX2; this->m_nativeVectorWidth = 16; + this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 32; this->m_vectorWidth = 16; this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" @@ -620,6 +642,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "avx2-i64x4")) { this->m_isa = Target::AVX2; this->m_nativeVectorWidth = 8; /* native vector width in terms of floats */ + this->m_nativeVectorAlignment = 32; this->m_dataTypeWidth = 64; this->m_vectorWidth = 4; this->m_attributes = "+avx2,+popcnt,+cmov,+f16c" @@ -645,6 +668,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "neon-i8x16")) { this->m_isa = Target::NEON8; this->m_nativeVectorWidth = 16; + this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 8; this->m_vectorWidth = 16; this->m_attributes = "+neon,+fp16"; @@ -655,6 +679,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : else if (!strcasecmp(isa, "neon-i16x8")) { this->m_isa = Target::NEON16; this->m_nativeVectorWidth = 8; + this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 16; this->m_vectorWidth = 8; this->m_attributes = "+neon,+fp16"; @@ -666,6 +691,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : !strcasecmp(isa, "neon-i32x4")) { this->m_isa = Target::NEON32; this->m_nativeVectorWidth = 4; + this->m_nativeVectorAlignment = 16; this->m_dataTypeWidth = 32; this->m_vectorWidth = 4; this->m_attributes = "+neon,+fp16"; diff --git a/ispc.h b/ispc.h index b319d656..4b333861 100644 --- a/ispc.h +++ b/ispc.h @@ -260,6 +260,8 @@ public: int getNativeVectorWidth() const {return m_nativeVectorWidth;} + int getNativeVectorAlignment() const {return m_nativeVectorAlignment;} + int getDataTypeWidth() const {return m_dataTypeWidth;} int getVectorWidth() const {return m_vectorWidth;} @@ -332,6 +334,13 @@ private: SSE, 8 for AVX, etc.) */ int m_nativeVectorWidth; + /** Native vector alignment in bytes. Theoretically this may be derived + from the vector size, but it's better to manage directly the alignement. + It allows easier experimenting and better fine tuning for particular + platform. This information is primatily used when + --opt=force-aligned-memory is used. */ + int m_nativeVectorAlignment; + /** Data type with in bits. Typically it's 32, but could be 8, 16 or 64. For generic it's -1, which means undefined. */ int m_dataTypeWidth; diff --git a/opt.cpp b/opt.cpp index 3e320b4b..9059c746 100644 --- a/opt.cpp +++ b/opt.cpp @@ -904,7 +904,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { lCopyMetadata(castPtr, callInst); int align; if (g->opt.forceAlignedMemory) - align = 0; + align = g->target->getNativeVectorAlignment(); else align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8; name = LLVMGetName(callInst->getArgOperand(0), "_load"); @@ -946,7 +946,7 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL); int align; if (g->opt.forceAlignedMemory) - align = 0; + align = g->target->getNativeVectorAlignment(); else align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8; storeInst->setAlignment(align); @@ -2758,7 +2758,8 @@ lImproveMaskedStore(llvm::CallInst *callInst) { lCopyMetadata(lvalue, callInst); llvm::Instruction *store = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, - g->opt.forceAlignedMemory ? 0 : info->align); + g->opt.forceAlignedMemory ? + g->target->getNativeVectorAlignment() : info->align); lCopyMetadata(store, callInst); llvm::ReplaceInstWithInst(callInst, store); return true; @@ -2821,7 +2822,8 @@ lImproveMaskedLoad(llvm::CallInst *callInst, callInst); llvm::Instruction *load = new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */, - g->opt.forceAlignedMemory ? 0 : info->align, + g->opt.forceAlignedMemory ? + g->target->getNativeVectorAlignment() : info->align, (llvm::Instruction *)NULL); lCopyMetadata(load, callInst); llvm::ReplaceInstWithInst(callInst, load); @@ -3226,6 +3228,9 @@ lEmitLoads(llvm::Value *basePtr, std::vector &loadOps, } case 4: { // 4-wide vector load + if (g->opt.forceAlignedMemory) { + align = g->target->getNativeVectorAlignment(); + } llvm::VectorType *vt = llvm::VectorType::get(LLVMTypes::Int32Type, 4); loadOps[i].load = lGEPAndLoad(basePtr, start, align, @@ -3234,6 +3239,9 @@ lEmitLoads(llvm::Value *basePtr, std::vector &loadOps, } case 8: { // 8-wide vector load + if (g->opt.forceAlignedMemory) { + align = g->target->getNativeVectorAlignment(); + } llvm::VectorType *vt = llvm::VectorType::get(LLVMTypes::Int32Type, 8); loadOps[i].load = lGEPAndLoad(basePtr, start, align, From 040ef0bc491a956dd3fbcae31f1f456f25e785ec Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Wed, 4 Dec 2013 16:29:15 +0400 Subject: [PATCH 16/21] Adding r196261 patch to previous vzeroupper fix for 3.3 --- ...93261_bug17631_196261_win_vzeroupper.patch | 115 ++++++++++++++++++ .../3_3_r193261_bug17631_win_vzeroupper.patch | 69 ----------- 2 files changed, 115 insertions(+), 69 deletions(-) create mode 100644 llvm_patches/3_3_r193261_bug17631_196261_win_vzeroupper.patch delete mode 100644 llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch diff --git a/llvm_patches/3_3_r193261_bug17631_196261_win_vzeroupper.patch b/llvm_patches/3_3_r193261_bug17631_196261_win_vzeroupper.patch new file mode 100644 index 00000000..8f0a790b --- /dev/null +++ b/llvm_patches/3_3_r193261_bug17631_196261_win_vzeroupper.patch @@ -0,0 +1,115 @@ +From b9b016cda57d8afc26a150de7ee329b54a994c85 Mon Sep 17 00:00:00 2001 +From: Michael Liao +Date: Mon, 21 Oct 2013 17:47:58 -0700 +Subject: [PATCH] Fix PR17631 + +- Skip instructions added in prolog. For specific targets, prolog may + insert helper function calls (e.g. _chkstk will be called when + there're more than 4K bytes allocated on stack). However, these + helpers don't use/def YMM/XMM registers. + It also include second fix for the problem: r196261+r196391. + +diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp +index 477f75a..0d37a7d 100644 +--- lib/Target/X86/X86VZeroUpper.cpp ++++ lib/Target/X86/X86VZeroUpper.cpp +@@ -121,7 +121,7 @@ + } + + static bool clobbersAllYmmRegs(const MachineOperand &MO) { +- for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) { ++ for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) { + if (!MO.clobbersPhysReg(reg)) + return false; + } +@@ -143,6 +143,21 @@ + return false; + } + ++/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this ++/// instruction. ++static bool clobbersAnyYmmReg(MachineInstr *MI) { ++ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { ++ const MachineOperand &MO = MI->getOperand(i); ++ if (!MO.isRegMask()) ++ continue; ++ for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) { ++ if (MO.clobbersPhysReg(reg)) ++ return true; ++ } ++ } ++ return false; ++} ++ + /// runOnMachineFunction - Loop over all of the basic blocks, inserting + /// vzero upper instructions before function calls. + bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { +@@ -226,8 +241,9 @@ + bool BBHasCall = false; + + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { ++ DebugLoc dl = I->getDebugLoc(); + MachineInstr *MI = I; +- DebugLoc dl = I->getDebugLoc(); ++ + bool isControlFlow = MI->isCall() || MI->isReturn(); + + // Shortcut: don't need to check regular instructions in dirty state. +@@ -246,6 +262,14 @@ + if (!isControlFlow) + continue; + ++ // If the call won't clobber any YMM register, skip it as well. It usually ++ // happens on helper function calls (such as '_chkstk', '_ftol2') where ++ // standard calling convention is not used (RegMask is not used to mark ++ // register clobbered and register usage (def/imp-def/use) is well-dfined ++ // and explicitly specified. ++ if (MI->isCall() && !clobbersAnyYmmReg(MI)) ++ continue; ++ + BBHasCall = true; + + // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX +diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll +new file mode 100644 +index 0000000..a572ff2 +--- /dev/null ++++ test/CodeGen/X86/pr17631.ll +@@ -0,0 +1,34 @@ ++; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s ++ ++%struct_type = type { [64 x <8 x float>], <8 x float> } ++ ++; Function Attrs: nounwind readnone ++declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) ++ ++; Function Attrs: nounwind ++define i32 @equal(<8 x i32> %A) { ++allocas: ++ %first_alloc = alloca [64 x <8 x i32>] ++ %second_alloc = alloca %struct_type ++ ++ %A1 = bitcast <8 x i32> %A to <8 x float> ++ %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1) ++ ret i32 %A2 ++} ++ ++; CHECK: equal ++; CHECK-NOT: vzeroupper ++; CHECK: _chkstk ++; CHECK: ret ++ ++define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) { ++ %i = fptoui double %x to i64 ++ store i64 %i, i64* %p ++ %ret = fadd <8 x float> %y, %y ++ ret <8 x float> %ret ++} ++ ++; CHECK: foo ++; CHECK-NOT: vzeroupper ++; CHECK: _ftol2 ++; CHECK: ret +-- +1.8.1.2 + diff --git a/llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch b/llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch deleted file mode 100644 index b6abb1d3..00000000 --- a/llvm_patches/3_3_r193261_bug17631_win_vzeroupper.patch +++ /dev/null @@ -1,69 +0,0 @@ -From b9b016cda57d8afc26a150de7ee329b54a994c85 Mon Sep 17 00:00:00 2001 -From: Michael Liao -Date: Mon, 21 Oct 2013 17:47:58 -0700 -Subject: [PATCH] Fix PR17631 - -- Skip instructions added in prolog. For specific targets, prolog may - insert helper function calls (e.g. _chkstk will be called when - there're more than 4K bytes allocated on stack). However, these - helpers don't use/def YMM/XMM registers. ---- - lib/Target/X86/X86VZeroUpper.cpp | 11 ++++++++++- - test/CodeGen/X86/pr17631.ll | 22 ++++++++++++++++++++++ - 2 files changed, 32 insertions(+), 1 deletion(-) - create mode 100644 test/CodeGen/X86/pr17631.ll - -diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp -index 477f75a..0d37a7d 100644 ---- lib/Target/X86/X86VZeroUpper.cpp -+++ lib/Target/X86/X86VZeroUpper.cpp -@@ -231,8 +231,17 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, - bool BBHasCall = false; - - for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { -- MachineInstr *MI = I; - DebugLoc dl = I->getDebugLoc(); -+ MachineInstr *MI = I; -+ -+ // Don't need to check instructions added in prolog. -+ // In prolog, special function calls may be added for specific targets -+ // (e.g. on Windows, a prolog helper '_chkstk' is called when the local -+ // variables exceed 4K bytes on stack.) These helpers won't use/def YMM/XMM -+ // registers. -+ if (MI->getFlag(MachineInstr::FrameSetup)) -+ continue; -+ - bool isControlFlow = MI->isCall() || MI->isReturn(); - - // Shortcut: don't need to check regular instructions in dirty state. -diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll -new file mode 100644 -index 0000000..a572ff2 ---- /dev/null -+++ test/CodeGen/X86/pr17631.ll -@@ -0,0 +1,22 @@ -+; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s -+ -+%struct_type = type { [64 x <8 x float>], <8 x float> } -+ -+; Function Attrs: nounwind readnone -+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) -+ -+; Function Attrs: nounwind -+define i32 @equal(<8 x i32> %A) { -+allocas: -+ %first_alloc = alloca [64 x <8 x i32>] -+ %second_alloc = alloca %struct_type -+ -+ %A1 = bitcast <8 x i32> %A to <8 x float> -+ %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1) -+ ret i32 %A2 -+} -+ -+; CHECK: equal -+; CHECK-NOT: vzeroupper -+; CHECK: _chkstk -+; CHECK: ret --- -1.8.1.2 - From 5012ba34b4b5952866ca7d4fe512c7cbd77be3e4 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Thu, 5 Dec 2013 19:38:46 +0400 Subject: [PATCH 17/21] increase data for examples --- examples/mandelbrot_tasks/mandelbrot_tasks.cpp | 4 ++-- examples/noise/noise.cpp | 4 ++-- examples/stencil/stencil.cpp | 2 +- perf.ini | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.cpp b/examples/mandelbrot_tasks/mandelbrot_tasks.cpp index 698daf0f..1c4d2ca5 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.cpp +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.cpp @@ -74,8 +74,8 @@ static void usage() { } int main(int argc, char *argv[]) { - unsigned int width = 1536; - unsigned int height = 1024; + unsigned int width = 1536 * 8; + unsigned int height = 1024 * 8; float x0 = -2; float x1 = 1; float y0 = -1; diff --git a/examples/noise/noise.cpp b/examples/noise/noise.cpp index 123f98c7..86b4f761 100644 --- a/examples/noise/noise.cpp +++ b/examples/noise/noise.cpp @@ -66,8 +66,8 @@ writePPM(float *buf, int width, int height, const char *fn) { int main() { - unsigned int width = 768; - unsigned int height = 768; + unsigned int width = 768 * 4; + unsigned int height = 768 * 4; float x0 = -10; float x1 = 10; float y0 = -10; diff --git a/examples/stencil/stencil.cpp b/examples/stencil/stencil.cpp index 593d901f..9cd12674 100644 --- a/examples/stencil/stencil.cpp +++ b/examples/stencil/stencil.cpp @@ -67,7 +67,7 @@ void InitData(int Nx, int Ny, int Nz, float *A[2], float *vsq) { int main() { - int Nx = 256, Ny = 256, Nz = 256; + int Nx = 256 * 2, Ny = 256 * 2, Nz = 256 * 2; int width = 4; float *Aserial[2], *Aispc[2]; Aserial[0] = new float [Nx * Ny * Nz]; diff --git a/perf.ini b/perf.ini index 249c25f4..b44a2853 100755 --- a/perf.ini +++ b/perf.ini @@ -10,7 +10,7 @@ %**************************************************************************************************** AOBench aobench -10 512 512 +3 2048 2048 #*** Deferred Shading deferred @@ -41,7 +41,7 @@ options #*** Ray Tracer rt -sponza +sponza --scale=6.0 #*** 3D Stencil stencil From 98c56c214a389208d6f5bccfde331ccc37755daf Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Fri, 6 Dec 2013 18:57:35 +0400 Subject: [PATCH 18/21] changing of examples --- examples/aobench/ao.cpp | 25 +++++++++----- examples/deferred/main.cpp | 22 +++++++++---- examples/mandelbrot/mandelbrot.cpp | 23 +++++++++++-- .../mandelbrot_tasks/mandelbrot_tasks.cpp | 27 ++++++++------- examples/noise/noise.cpp | 26 ++++++++++++--- examples/rt/rt.cpp | 33 ++++++++++--------- examples/stencil/stencil.cpp | 31 +++++++++++++---- examples/volume_rendering/volume.cpp | 19 ++++++++--- perf.ini | 14 +++++--- perf.py | 21 +++++++----- 10 files changed, 166 insertions(+), 75 deletions(-) diff --git a/examples/aobench/ao.cpp b/examples/aobench/ao.cpp index 2286316d..b4e2833d 100644 --- a/examples/aobench/ao.cpp +++ b/examples/aobench/ao.cpp @@ -60,7 +60,7 @@ using namespace ispc; extern void ao_serial(int w, int h, int nsubsamples, float image[]); -static unsigned int test_iterations; +static unsigned int test_iterations[] = {3, 7, 1}; static unsigned int width, height; static unsigned char *img; static float *fimg; @@ -106,16 +106,20 @@ savePPM(const char *fname, int w, int h) int main(int argc, char **argv) { - if (argc != 4) { + if (argc < 3) { printf ("%s\n", argv[0]); - printf ("Usage: ao [num test iterations] [width] [height]\n"); + printf ("Usage: ao [width] [height] [ispc iterations] [tasks iterations] [serial iterations]\n"); getchar(); exit(-1); } else { - test_iterations = atoi(argv[1]); - width = atoi (argv[2]); - height = atoi (argv[3]); + if (argc == 6) { + for (int i = 0; i < 3; i++) { + test_iterations[i] = atoi(argv[3 + i]); + } + } + width = atoi (argv[1]); + height = atoi (argv[2]); } // Allocate space for output images @@ -127,13 +131,14 @@ int main(int argc, char **argv) // time for any of them. // double minTimeISPC = 1e30; - for (unsigned int i = 0; i < test_iterations; i++) { + for (unsigned int i = 0; i < test_iterations[0]; i++) { memset((void *)fimg, 0, sizeof(float) * width * height * 3); assert(NSUBSAMPLES == 2); reset_and_start_timer(); ao_ispc(width, height, NSUBSAMPLES, fimg); double t = get_elapsed_mcycles(); + printf("@time of ISPC run:\t\t\t[%.3f] million cycles\n", t); minTimeISPC = std::min(minTimeISPC, t); } @@ -147,13 +152,14 @@ int main(int argc, char **argv) // minimum time for any of them. // double minTimeISPCTasks = 1e30; - for (unsigned int i = 0; i < test_iterations; i++) { + for (unsigned int i = 0; i < test_iterations[1]; i++) { memset((void *)fimg, 0, sizeof(float) * width * height * 3); assert(NSUBSAMPLES == 2); reset_and_start_timer(); ao_ispc_tasks(width, height, NSUBSAMPLES, fimg); double t = get_elapsed_mcycles(); + printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", t); minTimeISPCTasks = std::min(minTimeISPCTasks, t); } @@ -167,11 +173,12 @@ int main(int argc, char **argv) // minimum time. // double minTimeSerial = 1e30; - for (unsigned int i = 0; i < test_iterations; i++) { + for (unsigned int i = 0; i < test_iterations[2]; i++) { memset((void *)fimg, 0, sizeof(float) * width * height * 3); reset_and_start_timer(); ao_serial(width, height, NSUBSAMPLES, fimg); double t = get_elapsed_mcycles(); + printf("@time of serial run:\t\t\t\t[%.3f] million cycles\n", t); minTimeSerial = std::min(minTimeSerial, t); } diff --git a/examples/deferred/main.cpp b/examples/deferred/main.cpp index 4f2be879..d7f62f50 100644 --- a/examples/deferred/main.cpp +++ b/examples/deferred/main.cpp @@ -62,10 +62,16 @@ /////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { - if (argc != 2) { - printf("usage: deferred_shading \n"); + if (argc < 2) { + printf("usage: deferred_shading [tasks iterations] [serial iterations]\n"); return 1; } + static unsigned int test_iterations[] = {5, 3, 500}; //last value is for nframes, it is scale. + if (argc == 5) { + for (int i = 0; i < 3; i++) { + test_iterations[i] = atoi(argv[2 + i]); + } + } InputData *input = CreateInputDataFromFile(argv[1]); if (!input) { @@ -81,9 +87,9 @@ int main(int argc, char** argv) { InitDynamicCilk(input); #endif // __cilk - int nframes = 5; + int nframes = test_iterations[2]; double ispcCycles = 1e30; - for (int i = 0; i < 5; ++i) { + for (int i = 0; i < test_iterations[0]; ++i) { framebuffer.clear(); reset_and_start_timer(); for (int j = 0; j < nframes; ++j) @@ -91,6 +97,7 @@ int main(int argc, char** argv) { VISUALIZE_LIGHT_COUNT, framebuffer.r, framebuffer.g, framebuffer.b); double mcycles = get_elapsed_mcycles() / nframes; + printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", mcycles); ispcCycles = std::min(ispcCycles, mcycles); } printf("[ispc static + tasks]:\t\t[%.3f] million cycles to render " @@ -98,14 +105,16 @@ int main(int argc, char** argv) { input->header.framebufferWidth, input->header.framebufferHeight); WriteFrame("deferred-ispc-static.ppm", input, framebuffer); + nframes = 3; #ifdef __cilk double dynamicCilkCycles = 1e30; - for (int i = 0; i < 5; ++i) { + for (int i = 0; i < test_iterations[1]; ++i) { framebuffer.clear(); reset_and_start_timer(); for (int j = 0; j < nframes; ++j) DispatchDynamicCilk(input, &framebuffer); double mcycles = get_elapsed_mcycles() / nframes; + printf("@time of serial run:\t\t\t[%.3f] million cycles\n", mcycles); dynamicCilkCycles = std::min(dynamicCilkCycles, mcycles); } printf("[ispc + Cilk dynamic]:\t\t[%.3f] million cycles to render image\n", @@ -114,12 +123,13 @@ int main(int argc, char** argv) { #endif // __cilk double serialCycles = 1e30; - for (int i = 0; i < 5; ++i) { + for (int i = 0; i < test_iterations[1]; ++i) { framebuffer.clear(); reset_and_start_timer(); for (int j = 0; j < nframes; ++j) DispatchDynamicC(input, &framebuffer); double mcycles = get_elapsed_mcycles() / nframes; + printf("@time of serial run:\t\t\t[%.3f] million cycles\n", mcycles); serialCycles = std::min(serialCycles, mcycles); } printf("[C++ serial dynamic, 1 core]:\t[%.3f] million cycles to render image\n", diff --git a/examples/mandelbrot/mandelbrot.cpp b/examples/mandelbrot/mandelbrot.cpp index d2bebb96..fafc00d0 100644 --- a/examples/mandelbrot/mandelbrot.cpp +++ b/examples/mandelbrot/mandelbrot.cpp @@ -42,6 +42,7 @@ #include #include "../timing.h" #include "mandelbrot_ispc.h" +#include using namespace ispc; extern void mandelbrot_serial(float x0, float y0, float x1, float y1, @@ -67,7 +68,8 @@ writePPM(int *buf, int width, int height, const char *fn) { } -int main() { +int main(int argc, char *argv[]) { + static unsigned int test_iterations[] = {3, 3}; unsigned int width = 768; unsigned int height = 512; float x0 = -2; @@ -75,6 +77,19 @@ int main() { float y0 = -1; float y1 = 1; + if (argc > 1) { + if (strncmp(argv[1], "--scale=", 8) == 0) { + float scale = atof(argv[1] + 8); + width *= scale; + height *= scale; + } + } + if ((argc == 3) || (argc == 4)) { + for (int i = 0; i < 2; i++) { + test_iterations[i] = atoi(argv[argc - 2 + i]); + } + } + int maxIterations = 256; int *buf = new int[width*height]; @@ -83,10 +98,11 @@ int main() { // time of three runs. // double minISPC = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[0]; ++i) { reset_and_start_timer(); mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf); double dt = get_elapsed_mcycles(); + printf("@time of ISPC run:\t\t\t[%.3f] million cycles\n", dt); minISPC = std::min(minISPC, dt); } @@ -102,10 +118,11 @@ int main() { // minimum time. // double minSerial = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[1]; ++i) { reset_and_start_timer(); mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); double dt = get_elapsed_mcycles(); + printf("@time of serial run:\t\t\t[%.3f] million cycles\n", dt); minSerial = std::min(minSerial, dt); } diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.cpp b/examples/mandelbrot_tasks/mandelbrot_tasks.cpp index 1c4d2ca5..32db45bc 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.cpp +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.cpp @@ -69,21 +69,20 @@ writePPM(int *buf, int width, int height, const char *fn) { static void usage() { - fprintf(stderr, "usage: mandelbrot [--scale=]\n"); + fprintf(stderr, "usage: mandelbrot [--scale=] [tasks iterations] [serial iterations]\n"); exit(1); } int main(int argc, char *argv[]) { - unsigned int width = 1536 * 8; - unsigned int height = 1024 * 8; + static unsigned int test_iterations[] = {7, 1}; + unsigned int width = 1536; + unsigned int height = 1024; float x0 = -2; float x1 = 1; float y0 = -1; float y1 = 1; - if (argc == 1) - ; - else if (argc == 2) { + if (argc > 1) { if (strncmp(argv[1], "--scale=", 8) == 0) { float scale = atof(argv[1] + 8); if (scale == 0.f) @@ -94,11 +93,13 @@ int main(int argc, char *argv[]) { width = (width + 0xf) & ~0xf; height = (height + 0xf) & ~0xf; } - else - usage(); } - else - usage(); + if ((argc == 3) || (argc == 4)) { + for (int i = 0; i < 2; i++) { + test_iterations[i] = atoi(argv[argc - 2 + i]); + } + } + int maxIterations = 512; int *buf = new int[width*height]; @@ -108,13 +109,14 @@ int main(int argc, char *argv[]) { // time of three runs. // double minISPC = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[0]; ++i) { // Clear out the buffer for (unsigned int i = 0; i < width * height; ++i) buf[i] = 0; reset_and_start_timer(); mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf); double dt = get_elapsed_mcycles(); + printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", dt); minISPC = std::min(minISPC, dt); } @@ -127,13 +129,14 @@ int main(int argc, char *argv[]) { // minimum time. // double minSerial = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[1]; ++i) { // Clear out the buffer for (unsigned int i = 0; i < width * height; ++i) buf[i] = 0; reset_and_start_timer(); mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); double dt = get_elapsed_mcycles(); + printf("@time of serial run:\t\t\t[%.3f] million cycles\n", dt); minSerial = std::min(minSerial, dt); } diff --git a/examples/noise/noise.cpp b/examples/noise/noise.cpp index 86b4f761..0664bbd9 100644 --- a/examples/noise/noise.cpp +++ b/examples/noise/noise.cpp @@ -42,6 +42,7 @@ #include #include "../timing.h" #include "noise_ispc.h" +#include using namespace ispc; extern void noise_serial(float x0, float y0, float x1, float y1, @@ -65,14 +66,27 @@ writePPM(float *buf, int width, int height, const char *fn) { } -int main() { - unsigned int width = 768 * 4; - unsigned int height = 768 * 4; +int main(int argc, char *argv[]) { + static unsigned int test_iterations[] = {3, 1}; + unsigned int width = 768; + unsigned int height = 768; float x0 = -10; float x1 = 10; float y0 = -10; float y1 = 10; + if (argc > 1) { + if (strncmp(argv[1], "--scale=", 8) == 0) { + float scale = atof(argv[1] + 8); + width *= scale; + height *= scale; + } + } + if ((argc == 3) || (argc == 4)) { + for (int i = 0; i < 2; i++) { + test_iterations[i] = atoi(argv[argc - 2 + i]); + } + } float *buf = new float[width*height]; // @@ -80,10 +94,11 @@ int main() { // time of three runs. // double minISPC = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[0]; ++i) { reset_and_start_timer(); noise_ispc(x0, y0, x1, y1, width, height, buf); double dt = get_elapsed_mcycles(); + printf("@time of ISPC run:\t\t\t[%.3f] million cycles\n", dt); minISPC = std::min(minISPC, dt); } @@ -99,10 +114,11 @@ int main() { // minimum time. // double minSerial = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[1]; ++i) { reset_and_start_timer(); noise_serial(x0, y0, x1, y1, width, height, buf); double dt = get_elapsed_mcycles(); + printf("@time of serial run:\t\t\t[%.3f] million cycles\n", dt); minSerial = std::min(minSerial, dt); } diff --git a/examples/rt/rt.cpp b/examples/rt/rt.cpp index 48bcc423..8f61656a 100644 --- a/examples/rt/rt.cpp +++ b/examples/rt/rt.cpp @@ -96,27 +96,27 @@ static void writeImage(int *idImage, float *depthImage, int width, int height, static void usage() { - fprintf(stderr, "rt [--scale=] \n"); + fprintf(stderr, "rt [--scale=] [ispc iterations] [tasks iterations] [serial iterations]\n"); exit(1); } int main(int argc, char *argv[]) { + static unsigned int test_iterations[] = {3, 7, 1}; float scale = 1.f; const char *filename = NULL; - for (int i = 1; i < argc; ++i) { - if (strncmp(argv[i], "--scale=", 8) == 0) { - scale = atof(argv[i] + 8); - if (scale == 0.f) - usage(); + if (argc < 2) usage(); + filename = argv[1]; + if (argc > 2) { + if (strncmp(argv[2], "--scale=", 8) == 0) { + scale = atof(argv[2] + 8); + } + } + if ((argc == 6) || (argc == 5)) { + for (int i = 0; i < 3; i++) { + test_iterations[i] = atoi(argv[argc - 3 + i]); } - else if (filename != NULL) - usage(); - else - filename = argv[i]; } - if (filename == NULL) - usage(); #define READ(var, n) \ if (fread(&(var), sizeof(var), n, f) != (unsigned int)n) { \ @@ -211,11 +211,12 @@ int main(int argc, char *argv[]) { // Run 3 iterations with ispc + 1 core, record the minimum time // double minTimeISPC = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[0]; ++i) { reset_and_start_timer(); raytrace_ispc(width, height, baseWidth, baseHeight, raster2camera, camera2world, image, id, nodes, triangles); double dt = get_elapsed_mcycles(); + printf("@time of ISPC run:\t\t\t[%.3f] million cycles\n", dt); minTimeISPC = std::min(dt, minTimeISPC); } printf("[rt ispc, 1 core]:\t\t[%.3f] million cycles for %d x %d image\n", @@ -230,11 +231,12 @@ int main(int argc, char *argv[]) { // Run 3 iterations with ispc + 1 core, record the minimum time // double minTimeISPCtasks = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[1]; ++i) { reset_and_start_timer(); raytrace_ispc_tasks(width, height, baseWidth, baseHeight, raster2camera, camera2world, image, id, nodes, triangles); double dt = get_elapsed_mcycles(); + printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", dt); minTimeISPCtasks = std::min(dt, minTimeISPCtasks); } printf("[rt ispc + tasks]:\t\t[%.3f] million cycles for %d x %d image\n", @@ -250,11 +252,12 @@ int main(int argc, char *argv[]) { // minimum time. // double minTimeSerial = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[2]; ++i) { reset_and_start_timer(); raytrace_serial(width, height, baseWidth, baseHeight, raster2camera, camera2world, image, id, nodes, triangles); double dt = get_elapsed_mcycles(); + printf("@time of serial run:\t\t\t[%.3f] million cycles\n", dt); minTimeSerial = std::min(dt, minTimeSerial); } printf("[rt serial]:\t\t\t[%.3f] million cycles for %d x %d image\n", diff --git a/examples/stencil/stencil.cpp b/examples/stencil/stencil.cpp index 9cd12674..33abc85c 100644 --- a/examples/stencil/stencil.cpp +++ b/examples/stencil/stencil.cpp @@ -40,6 +40,7 @@ #include #include +#include #include #include "../timing.h" #include "stencil_ispc.h" @@ -66,9 +67,25 @@ void InitData(int Nx, int Ny, int Nz, float *A[2], float *vsq) { } -int main() { - int Nx = 256 * 2, Ny = 256 * 2, Nz = 256 * 2; +int main(int argc, char *argv[]) { + static unsigned int test_iterations[] = {3, 3, 3};//the last two numbers must be equal here + int Nx = 256, Ny = 256, Nz = 256; int width = 4; + + if (argc > 1) { + if (strncmp(argv[1], "--scale=", 8) == 0) { + float scale = atof(argv[1] + 8); + Nx *= scale; + Ny *= scale; + Nz *= scale; + } + } + if ((argc == 4) || (argc == 5)) { + for (int i = 0; i < 3; i++) { + test_iterations[i] = atoi(argv[argc - 3 + i]); + } + } + float *Aserial[2], *Aispc[2]; Aserial[0] = new float [Nx * Ny * Nz]; Aserial[1] = new float [Nx * Ny * Nz]; @@ -79,18 +96,18 @@ int main() { float coeff[4] = { 0.5, -.25, .125, -.0625 }; InitData(Nx, Ny, Nz, Aispc, vsq); - // // Compute the image using the ispc implementation on one core; report // the minimum time of three runs. // double minTimeISPC = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[0]; ++i) { reset_and_start_timer(); loop_stencil_ispc(0, 6, width, Nx - width, width, Ny - width, width, Nz - width, Nx, Ny, Nz, coeff, vsq, Aispc[0], Aispc[1]); double dt = get_elapsed_mcycles(); + printf("@time of ISPC run:\t\t\t[%.3f] million cycles\n", dt); minTimeISPC = std::min(minTimeISPC, dt); } @@ -103,12 +120,13 @@ int main() { // the minimum time of three runs. // double minTimeISPCTasks = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[1]; ++i) { reset_and_start_timer(); loop_stencil_ispc_tasks(0, 6, width, Nx - width, width, Ny - width, width, Nz - width, Nx, Ny, Nz, coeff, vsq, Aispc[0], Aispc[1]); double dt = get_elapsed_mcycles(); + printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", dt); minTimeISPCTasks = std::min(minTimeISPCTasks, dt); } @@ -121,12 +139,13 @@ int main() { // minimum time. // double minTimeSerial = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[2]; ++i) { reset_and_start_timer(); loop_stencil_serial(0, 6, width, Nx-width, width, Ny - width, width, Nz - width, Nx, Ny, Nz, coeff, vsq, Aserial[0], Aserial[1]); double dt = get_elapsed_mcycles(); + printf("@time of serial run:\t\t\t[%.3f] million cycles\n", dt); minTimeSerial = std::min(minTimeSerial, dt); } diff --git a/examples/volume_rendering/volume.cpp b/examples/volume_rendering/volume.cpp index 458cd407..b6eda986 100644 --- a/examples/volume_rendering/volume.cpp +++ b/examples/volume_rendering/volume.cpp @@ -135,10 +135,16 @@ loadVolume(const char *fn, int n[3]) { int main(int argc, char *argv[]) { - if (argc != 3) { - fprintf(stderr, "usage: volume \n"); + static unsigned int test_iterations[] = {3, 7, 1}; + if (argc < 3) { + fprintf(stderr, "usage: volume [ispc iterations] [tasks iterations] [serial iterations]\n"); return 1; } + if (argc == 6) { + for (int i = 0; i < 3; i++) { + test_iterations[i] = atoi(argv[3 + i]); + } + } // // Load viewing data and the volume density data @@ -156,11 +162,12 @@ int main(int argc, char *argv[]) { // time of three runs. // double minISPC = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[0]; ++i) { reset_and_start_timer(); volume_ispc(density, n, raster2camera, camera2world, width, height, image); double dt = get_elapsed_mcycles(); + printf("@time of ISPC run:\t\t\t[%.3f] million cycles\n", dt); minISPC = std::min(minISPC, dt); } @@ -176,11 +183,12 @@ int main(int argc, char *argv[]) { // tasks; report the minimum time of three runs. // double minISPCtasks = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[1]; ++i) { reset_and_start_timer(); volume_ispc_tasks(density, n, raster2camera, camera2world, width, height, image); double dt = get_elapsed_mcycles(); + printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", dt); minISPCtasks = std::min(minISPCtasks, dt); } @@ -196,11 +204,12 @@ int main(int argc, char *argv[]) { // minimum time. // double minSerial = 1e30; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < test_iterations[2]; ++i) { reset_and_start_timer(); volume_serial(density, n, raster2camera, camera2world, width, height, image); double dt = get_elapsed_mcycles(); + printf("@time of serial run:\t\t\t[%.3f] million cycles\n", dt); minSerial = std::min(minSerial, dt); } diff --git a/perf.ini b/perf.ini index b44a2853..eea017de 100755 --- a/perf.ini +++ b/perf.ini @@ -8,26 +8,29 @@ % #*** % [% comment] %**************************************************************************************************** +% All parameters of iteration number must be at the end of command string. Now all of the, are default (3 7 1). AOBench aobench -3 2048 2048 +% --scale= from parameters +2048 2048 #*** Deferred Shading deferred +% --scale= from data and third parameter data/pp1280x720.bin #*** Mandelbrot Set mandelbrot - +--scale=1.0 #*** Mandelbrot Set mandelbrot_tasks - +--scale=8.0 ^ #*** Perlin Noise Function noise - +--scale=4.0 #*** Binomial Options options @@ -45,10 +48,11 @@ sponza --scale=6.0 #*** 3D Stencil stencil - +--scale=2.0 #*** Volume Rendering volume_rendering +% --scale= from data camera.dat density_highres.vol #*** Sort diff --git a/perf.py b/perf.py index d1134990..65895335 100755 --- a/perf.py +++ b/perf.py @@ -99,16 +99,19 @@ def analyse_test(c1, c2, test, b_serial, perf_temp_n): j+=1 if "million cycles" in line: if j == c1: - line = line.replace("]","[") - line = line.split("[") - number = float(line[3]) - if "tasks" in line[1]: - absolute_tasks.append(number) + if line[0] == '@': + print_debug(line, True, perf_log) else: - if "ispc" in line[1]: - absolute_ispc.append(number) - if "serial" in line[1]: - serial.append(number) + line = line.replace("]","[") + line = line.split("[") + number = float(line[3]) + if "tasks" in line[1]: + absolute_tasks.append(number) + else: + if "ispc" in line[1]: + absolute_ispc.append(number) + if "serial" in line[1]: + serial.append(number) if len(ispc) != 0: if len(tasks) != 0: From be21d190e2d35767557382088a3fc3f5547e800a Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Tue, 10 Dec 2013 15:26:30 +0400 Subject: [PATCH 19/21] Update of 3.4 branch to rc2 --- alloy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alloy.py b/alloy.py index 657e67bf..83296b46 100755 --- a/alloy.py +++ b/alloy.py @@ -89,7 +89,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra, if version_LLVM == "trunk": SVN_PATH="trunk" if version_LLVM == "3.4": - SVN_PATH="tags/RELEASE_34/rc1" + SVN_PATH="tags/RELEASE_34/rc2" version_LLVM = "3_4" if version_LLVM == "3.3": SVN_PATH="tags/RELEASE_33/final" From e618b1e14bed17d95d576847dbc4c383a7d3f4dd Mon Sep 17 00:00:00 2001 From: Dmitry Babokin Date: Tue, 10 Dec 2013 16:11:48 +0400 Subject: [PATCH 20/21] Removing patch witch already presents in 3.4/rc2 --- .../3_4_r195476_r195779_i16_sext.patch | 57 ------------------- 1 file changed, 57 deletions(-) delete mode 100644 llvm_patches/3_4_r195476_r195779_i16_sext.patch diff --git a/llvm_patches/3_4_r195476_r195779_i16_sext.patch b/llvm_patches/3_4_r195476_r195779_i16_sext.patch deleted file mode 100644 index 4e2c0f6b..00000000 --- a/llvm_patches/3_4_r195476_r195779_i16_sext.patch +++ /dev/null @@ -1,57 +0,0 @@ -Two stability patches affecting sse4-i16x8 and sse4-i8x16 targets. See PR18014 and PR18054 for more details. - -Index: lib/Target/X86/X86ISelLowering.cpp -=================================================================== ---- lib/Target/X86/X86ISelLowering.cpp (revision 195863) -+++ lib/Target/X86/X86ISelLowering.cpp (working copy) -@@ -13120,19 +13120,27 @@ - // fall through - case MVT::v4i32: - case MVT::v8i16: { -- // (sext (vzext x)) -> (vsext x) - SDValue Op0 = Op.getOperand(0); - SDValue Op00 = Op0.getOperand(0); - SDValue Tmp1; - // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. - if (Op0.getOpcode() == ISD::BITCAST && -- Op00.getOpcode() == ISD::VECTOR_SHUFFLE) -+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) { -+ // (sext (vzext x)) -> (vsext x) - Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG); -- if (Tmp1.getNode()) { -- SDValue Tmp1Op0 = Tmp1.getOperand(0); -- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && -- "This optimization is invalid without a VZEXT."); -- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); -+ if (Tmp1.getNode()) { -+ EVT ExtraEltVT = ExtraVT.getVectorElementType(); -+ // This folding is only valid when the in-reg type is a vector of i8, -+ // i16, or i32. -+ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 || -+ ExtraEltVT == MVT::i32) { -+ SDValue Tmp1Op0 = Tmp1.getOperand(0); -+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && -+ "This optimization is invalid without a VZEXT."); -+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); -+ } -+ Op0 = Tmp1; -+ } - } - - // If the above didn't work, then just use Shift-Left + Shift-Right. -@@ -17007,6 +17015,15 @@ - if (BitWidth == 1) - return SDValue(); - -+ // Check all uses of that condition operand to check whether it will be -+ // consumed by non-BLEND instructions, which may depend on all bits are set -+ // properly. -+ for (SDNode::use_iterator I = Cond->use_begin(), -+ E = Cond->use_end(); I != E; ++I) -+ if (I->getOpcode() != ISD::VSELECT) -+ // TODO: Add other opcodes eventually lowered into BLEND. -+ return SDValue(); -+ - assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); - APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); - From b19937c4dcb408d246d49ee897fb935674d94998 Mon Sep 17 00:00:00 2001 From: Ilia Filippov Date: Thu, 12 Dec 2013 19:25:02 +0400 Subject: [PATCH 21/21] deleting isPrimitiveType() --- cbackend.cpp | 6 +++--- ispc.cpp | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cbackend.cpp b/cbackend.cpp index 40f87074..3db2d504 100644 --- a/cbackend.cpp +++ b/cbackend.cpp @@ -660,7 +660,7 @@ void CWriter::printStructReturnPointerFunctionType(llvm::raw_ostream &Out, llvm::raw_ostream & CWriter::printSimpleType(llvm::raw_ostream &Out, llvm::Type *Ty, bool isSigned, const std::string &NameSoFar) { - assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && + assert((Ty->isFloatingPointTy() || Ty->isX86_MMXTy() || Ty->isIntegerTy() || Ty->isVectorTy() || Ty->isVoidTy()) && "Invalid type for printSimpleType"); switch (Ty->getTypeID()) { case llvm::Type::VoidTyID: return Out << "void " << NameSoFar; @@ -756,7 +756,7 @@ llvm::raw_ostream &CWriter::printType(llvm::raw_ostream &Out, llvm::Type *Ty, #endif ) { - if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) { + if (Ty->isFloatingPointTy() || Ty->isX86_MMXTy() || Ty->isIntegerTy() || Ty->isVectorTy() || Ty->isVoidTy()) { printSimpleType(Out, Ty, isSigned, NameSoFar); return Out; } @@ -2737,7 +2737,7 @@ void CWriter::printModuleTypes() { void CWriter::printContainedStructs(llvm::Type *Ty, llvm::SmallPtrSet &Printed) { // Don't walk through pointers. - if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy()) + if (!(Ty->isStructTy() || Ty->isArrayTy())) return; // Print all contained types first. diff --git a/ispc.cpp b/ispc.cpp index b1790dc3..ed326b14 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -944,7 +944,8 @@ Target::GetISATargetString() const { static bool lGenericTypeLayoutIndeterminate(llvm::Type *type) { - if (type->isPrimitiveType() || type->isIntegerTy()) + if (type->isFloatingPointTy() || type->isX86_MMXTy() || type->isVoidTy() || + type->isIntegerTy() || type->isLabelTy() || type->isMetadataTy()) return false; if (type == LLVMTypes::BoolVectorType ||