From bedaec22954e770ec717b5318d2ce09ebe66cc85 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 4 Oct 2011 15:56:34 -0700 Subject: [PATCH] Update examples for multi-target compilation. Makefile and vcxproj file updates. Also modified vcxproj files so that the various files ispc generates go into $(TargetDir), not the current directory. Modified the ray tracer example to not have uniform short-vector types in its app-visible datatypes (these are laid out differently on SSE vs AVX); there was an existing lurking bug in the way this was done before. --- examples/aobench/Makefile | 12 ++++++---- examples/aobench/aobench.vcxproj | 20 ++++++++++------- examples/aobench_instrumented/Makefile | 2 +- .../aobench_instrumented.vcxproj | 22 +++++++++++-------- examples/deferred/Makefile | 8 ++++--- examples/deferred/deferred_shading.vcxproj | 20 ++++++++++------- examples/deferred/kernels.ispc | 2 -- examples/mandelbrot/Makefile | 12 ++++++---- examples/mandelbrot/mandelbrot.vcxproj | 22 +++++++++++-------- examples/mandelbrot_tasks/Makefile | 12 ++++++---- .../mandelbrot_tasks/mandelbrot_tasks.vcxproj | 20 ++++++++++------- examples/noise/Makefile | 11 ++++++---- examples/noise/noise.vcxproj | 22 +++++++++++-------- examples/options/Makefile | 12 ++++++---- examples/options/options.vcxproj | 22 +++++++++++-------- examples/rt/Makefile | 11 ++++++---- examples/rt/rt.cpp | 18 +++++++-------- examples/rt/rt.ispc | 22 ++++++++++++------- examples/rt/rt.vcxproj | 20 ++++++++++------- examples/rt/rt_serial.cpp | 22 ++++++++++++------- examples/simple/Makefile | 2 +- examples/simple/simple.vcxproj | 22 +++++++++++-------- examples/stencil/Makefile | 12 ++++++---- examples/stencil/stencil.vcxproj | 20 ++++++++++------- examples/volume_rendering/Makefile | 11 ++++++---- examples/volume_rendering/volume.vcxproj | 20 ++++++++++------- 26 files changed, 242 insertions(+), 157 deletions(-) diff --git a/examples/aobench/Makefile b/examples/aobench/Makefile index e8fce406..66fde5d3 100644 --- a/examples/aobench/Makefile +++ b/examples/aobench/Makefile @@ -8,7 +8,11 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o))) CXX=g++ CXXFLAGS=-Iobjs/ -O3 -Wall -m64 ISPC=ispc -ISPCFLAGS=-O2 --target=sse4 --arch=x86-64 +ISPCFLAGS=-O2 --target=sse2,sse4,avx --arch=x86-64 + +ISPC_OBJS=objs/ao_ispc.o objs/ao_ispc_sse2.o objs/ao_ispc_sse4.o \ + objs/ao_ispc_avx.o +OBJS=objs/ao.o objs/ao_serial.o $(ISPC_OBJS) $(TASK_OBJ) default: ao @@ -20,8 +24,8 @@ dirs: clean: /bin/rm -rf objs *~ ao -ao: dirs objs/ao.o objs/ao_serial.o objs/ao_ispc.o $(TASK_OBJ) - $(CXX) $(CXXFLAGS) -o $@ objs/ao.o objs/ao_ispc.o objs/ao_serial.o $(TASK_OBJ) -lm $(TASK_LIB) +ao: dirs $(OBJS) $(TASK_OBJ) + $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB) objs/%.o: %.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ @@ -31,5 +35,5 @@ objs/%.o: ../%.cpp objs/ao.o: objs/ao_ispc.h -objs/%_ispc.h objs/%_ispc.o: %.ispc +objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h diff --git a/examples/aobench/aobench.vcxproj b/examples/aobench/aobench.vcxproj index af754e26..5d61f0bb 100644 --- a/examples/aobench/aobench.vcxproj +++ b/examples/aobench/aobench.vcxproj @@ -26,18 +26,18 @@ Document - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx - %(Filename).obj - %(Filename).obj - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx - %(Filename).obj - %(Filename).obj + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h @@ -103,6 +103,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -118,6 +119,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -135,6 +137,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -153,6 +156,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast diff --git a/examples/aobench_instrumented/Makefile b/examples/aobench_instrumented/Makefile index 20a5a9b9..bfcdfc48 100644 --- a/examples/aobench_instrumented/Makefile +++ b/examples/aobench_instrumented/Makefile @@ -2,7 +2,7 @@ CXX=g++ -m64 CXXFLAGS=-Iobjs/ -g3 -Wall ISPC=ispc -ISPCFLAGS=-O2 --instrument --arch=x86-64 +ISPCFLAGS=-O2 --instrument --arch=x86-64 --target=sse2 default: ao diff --git a/examples/aobench_instrumented/aobench_instrumented.vcxproj b/examples/aobench_instrumented/aobench_instrumented.vcxproj index 3dec13e1..3012e236 100644 --- a/examples/aobench_instrumented/aobench_instrumented.vcxproj +++ b/examples/aobench_instrumented/aobench_instrumented.vcxproj @@ -1,4 +1,4 @@ - + @@ -25,18 +25,18 @@ Document - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --instrument + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --instrument --target=sse2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --instrument + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --instrument --target=sse2 - %(Filename).obj - %(Filename).obj - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --instrument + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --instrument --target=sse2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --instrument + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --instrument --target=sse2 - %(Filename).obj - %(Filename).obj + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h @@ -102,6 +102,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + $(TargetDir) Console @@ -115,6 +116,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + $(TargetDir) Console @@ -130,6 +132,7 @@ true true WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + $(TargetDir) Console @@ -147,6 +150,7 @@ true true WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + $(TargetDir) Console diff --git a/examples/deferred/Makefile b/examples/deferred/Makefile index 840af3de..a6bc5e53 100644 --- a/examples/deferred/Makefile +++ b/examples/deferred/Makefile @@ -8,9 +8,11 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o))) CXX=g++ CXXFLAGS=-Iobjs/ -O3 -Wall -m64 ISPC=ispc -ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64 --math-lib=fast +ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64 --math-lib=fast -OBJS=objs/main.o objs/common.o objs/kernels_ispc.o objs/dynamic_c.o objs/dynamic_cilk.o +OBJS=objs/main.o objs/common.o objs/kernels_ispc.o objs/kernels_ispc_sse2.o \ + objs/kernels_ispc_sse4.o objs/kernels_ispc_avx.o \ + objs/dynamic_c.o objs/dynamic_cilk.o default: deferred_shading @@ -32,5 +34,5 @@ objs/%.o: %.cpp objs/kernels_ispc.h deferred.h objs/%.o: ../%.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ -objs/%_ispc.h objs/%_ispc.o: %.ispc +objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h diff --git a/examples/deferred/deferred_shading.vcxproj b/examples/deferred/deferred_shading.vcxproj index b87bec51..9b429120 100755 --- a/examples/deferred/deferred_shading.vcxproj +++ b/examples/deferred/deferred_shading.vcxproj @@ -81,6 +81,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -96,6 +97,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -113,6 +115,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -131,6 +134,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -150,18 +154,18 @@ Document - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2 + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/deferred/kernels.ispc b/examples/deferred/kernels.ispc index 7fc046e4..65fa1547 100644 --- a/examples/deferred/kernels.ispc +++ b/examples/deferred/kernels.ispc @@ -66,8 +66,6 @@ struct InputHeader uniform int32 inputDataArrayOffsets[idaNum]; }; -export void foo(reference InputHeader h) { } - /////////////////////////////////////////////////////////////////////////// // Common utility routines diff --git a/examples/mandelbrot/Makefile b/examples/mandelbrot/Makefile index a78e4743..256500c9 100644 --- a/examples/mandelbrot/Makefile +++ b/examples/mandelbrot/Makefile @@ -2,7 +2,7 @@ CXX=g++ -m64 CXXFLAGS=-Iobjs/ -O3 -Wall ISPC=ispc -ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64 +ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64 default: mandelbrot @@ -14,13 +14,17 @@ dirs: clean: /bin/rm -rf objs *~ mandelbrot -mandelbrot: dirs objs/mandelbrot.o objs/mandelbrot_serial.o objs/mandelbrot_ispc.o - $(CXX) $(CXXFLAGS) -o $@ objs/mandelbrot.o objs/mandelbrot_ispc.o objs/mandelbrot_serial.o -lm +OBJS=objs/mandelbrot.o objs/mandelbrot_serial.o objs/mandelbrot_ispc_sse2.o \ + objs/mandelbrot_ispc_sse4.o objs/mandelbrot_ispc_avx.o \ + objs/mandelbrot_ispc.o + +mandelbrot: dirs $(OBJS) + $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm objs/%.o: %.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ objs/mandelbrot.o: objs/mandelbrot_ispc.h -objs/%_ispc.h objs/%_ispc.o: %.ispc +objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h diff --git a/examples/mandelbrot/mandelbrot.vcxproj b/examples/mandelbrot/mandelbrot.vcxproj index d37f210f..2365f5d8 100644 --- a/examples/mandelbrot/mandelbrot.vcxproj +++ b/examples/mandelbrot/mandelbrot.vcxproj @@ -1,4 +1,4 @@ - + @@ -81,6 +81,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -96,6 +97,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -113,6 +115,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -131,6 +134,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -147,18 +151,18 @@ Document - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2 + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/mandelbrot_tasks/Makefile b/examples/mandelbrot_tasks/Makefile index 55b8b03f..10e1e3d9 100644 --- a/examples/mandelbrot_tasks/Makefile +++ b/examples/mandelbrot_tasks/Makefile @@ -8,7 +8,11 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o))) CXX=g++ CXXFLAGS=-Iobjs/ -O3 -Wall -m64 ISPC=ispc -ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64 +ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64 + +OBJS=objs/mandelbrot.o objs/mandelbrot_serial.o $(TASK_OBJ) \ + objs/mandelbrot_ispc.o objs/mandelbrot_ispc_sse2.o \ + objs/mandelbrot_ispc_sse4.o objs/mandelbrot_ispc_avx.o default: mandelbrot @@ -20,8 +24,8 @@ dirs: clean: /bin/rm -rf objs *~ mandelbrot -mandelbrot: dirs objs/mandelbrot.o objs/mandelbrot_serial.o objs/mandelbrot_ispc.o $(TASK_OBJ) - $(CXX) $(CXXFLAGS) -o $@ objs/mandelbrot.o objs/mandelbrot_ispc.o objs/mandelbrot_serial.o $(TASK_OBJ) -lm $(TASK_LIB) +mandelbrot: dirs $(OBJS) + $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB) objs/%.o: %.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ @@ -31,5 +35,5 @@ objs/%.o: ../%.cpp objs/mandelbrot.o: objs/mandelbrot_ispc.h -objs/%_ispc.h objs/%_ispc.o: %.ispc +objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj index bc247f4e..86bf7292 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj @@ -81,6 +81,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -96,6 +97,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -113,6 +115,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -131,6 +134,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -148,18 +152,18 @@ Document - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2 + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/noise/Makefile b/examples/noise/Makefile index 66f4401d..f2158742 100644 --- a/examples/noise/Makefile +++ b/examples/noise/Makefile @@ -2,7 +2,10 @@ CXX=g++ -m64 CXXFLAGS=-Iobjs/ -O3 -Wall ISPC=ispc -ISPCFLAGS=-O2 --target=sse4 --arch=x86-64 +ISPCFLAGS=-O2 --target=sse2,sse4,avx-x2 --arch=x86-64 + +OBJS=objs/noise.o objs/noise_serial.o objs/noise_ispc.o objs/noise_ispc_sse2.o \ + objs/noise_ispc_sse4.o objs/noise_ispc_avx.o default: noise @@ -14,13 +17,13 @@ dirs: clean: /bin/rm -rf objs *~ noise -noise: dirs objs/noise.o objs/noise_serial.o objs/noise_ispc.o - $(CXX) $(CXXFLAGS) -o $@ objs/noise.o objs/noise_ispc.o objs/noise_serial.o -lm +noise: dirs $(OBJS) + $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm objs/%.o: %.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ objs/noise.o: objs/noise_ispc.h -objs/%_ispc.h objs/%_ispc.o: %.ispc +objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h diff --git a/examples/noise/noise.vcxproj b/examples/noise/noise.vcxproj index 03f47da6..4ccbe588 100644 --- a/examples/noise/noise.vcxproj +++ b/examples/noise/noise.vcxproj @@ -1,4 +1,4 @@ - + @@ -81,6 +81,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -96,6 +97,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -113,6 +115,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -131,6 +134,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -147,18 +151,18 @@ Document - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4 + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/options/Makefile b/examples/options/Makefile index d5f519f6..4163ba3a 100644 --- a/examples/options/Makefile +++ b/examples/options/Makefile @@ -2,7 +2,11 @@ CXX=g++ -m64 CXXFLAGS=-Iobjs/ -g -Wall ISPC=ispc -ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64 +ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64 + +OBJS=objs/options.o objs/options_serial.o objs/options_ispc.o \ + objs/options_ispc_sse2.o objs/options_ispc_sse4.o \ + objs/options_ispc_avx.o default: options @@ -14,13 +18,13 @@ dirs: clean: /bin/rm -rf objs *~ options -options: dirs objs/options.o objs/options_serial.o objs/options_ispc.o - $(CXX) $(CXXFLAGS) -o $@ objs/options.o objs/options_ispc.o objs/options_serial.o -lm +options: dirs $(OBJS) + $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm objs/%.o: %.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ objs/options.o: objs/options_ispc.h options_defs.h -objs/%_ispc.h objs/%_ispc.o: %.ispc options_defs.h +objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc options_defs.h $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h diff --git a/examples/options/options.vcxproj b/examples/options/options.vcxproj index 88fe2efb..e776dc98 100644 --- a/examples/options/options.vcxproj +++ b/examples/options/options.vcxproj @@ -1,4 +1,4 @@ - + @@ -81,6 +81,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) 4305 true Fast @@ -97,6 +98,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) 4305 true Fast @@ -115,6 +117,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) 4305 Fast @@ -134,6 +137,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) 4305 Fast @@ -151,18 +155,18 @@ Document - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2 + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/rt/Makefile b/examples/rt/Makefile index 6d5b1e1c..f5f2318f 100644 --- a/examples/rt/Makefile +++ b/examples/rt/Makefile @@ -8,7 +8,10 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o))) CXX=g++ CXXFLAGS=-Iobjs/ -O3 -Wall -m64 ISPC=ispc -ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64 +ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64 + +OBJS=objs/rt.o objs/rt_serial.o $(TASK_OBJ) objs/rt_ispc.o objs/rt_ispc_sse2.o \ + objs/rt_ispc_sse4.o objs/rt_ispc_avx.o default: rt @@ -20,8 +23,8 @@ dirs: clean: /bin/rm -rf objs *~ rt -rt: dirs objs/rt.o objs/rt_serial.o objs/rt_ispc.o $(TASK_OBJ) - $(CXX) $(CXXFLAGS) -o $@ objs/rt.o objs/rt_ispc.o objs/rt_serial.o $(TASK_OBJ) -lm $(TASK_LIB) +rt: dirs $(OBJS) + $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB) objs/%.o: %.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ @@ -31,5 +34,5 @@ objs/%.o: ../%.cpp objs/rt.o: objs/rt_ispc.h -objs/%_ispc.h objs/%_ispc.o: %.ispc +objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h diff --git a/examples/rt/rt.cpp b/examples/rt/rt.cpp index 4745f01d..4bc07c5e 100644 --- a/examples/rt/rt.cpp +++ b/examples/rt/rt.cpp @@ -168,12 +168,12 @@ int main(int argc, char *argv[]) { // of node, the total number of int it if a leaf node, etc. float b[6]; READ(b[0], 6); - nodes[i].bounds[0].v[0] = b[0]; - nodes[i].bounds[0].v[1] = b[1]; - nodes[i].bounds[0].v[2] = b[2]; - nodes[i].bounds[1].v[0] = b[3]; - nodes[i].bounds[1].v[1] = b[4]; - nodes[i].bounds[1].v[2] = b[5]; + nodes[i].bounds[0][0] = b[0]; + nodes[i].bounds[0][1] = b[1]; + nodes[i].bounds[0][2] = b[2]; + nodes[i].bounds[1][0] = b[3]; + nodes[i].bounds[1][1] = b[4]; + nodes[i].bounds[1][2] = b[5]; READ(nodes[i].offset, 1); READ(nodes[i].nPrimitives, 1); READ(nodes[i].splitAxis, 1); @@ -190,9 +190,9 @@ int main(int argc, char *argv[]) { READ(v[0], 9); float *vp = v; for (int j = 0; j < 3; ++j) { - triangles[i].p[j].v[0] = *vp++; - triangles[i].p[j].v[1] = *vp++; - triangles[i].p[j].v[2] = *vp++; + triangles[i].p[j][0] = *vp++; + triangles[i].p[j][1] = *vp++; + triangles[i].p[j][2] = *vp++; } // And create an object id triangles[i].id = i+1; diff --git a/examples/rt/rt.ispc b/examples/rt/rt.ispc index 9f57ec14..88a4a7f6 100644 --- a/examples/rt/rt.ispc +++ b/examples/rt/rt.ispc @@ -43,12 +43,13 @@ struct Ray { }; struct Triangle { - uniform float3 p[3]; + uniform float p[3][4]; uniform int id; + uniform int pad[3]; }; struct LinearBVHNode { - uniform float3 bounds[2]; + uniform float bounds[2][3]; uniform unsigned int offset; // num primitives for leaf, second child for interior uniform unsigned int8 nPrimitives; uniform unsigned int8 splitAxis; @@ -103,14 +104,16 @@ static void generateRay(uniform const float raster2camera[4][4], } -static inline bool BBoxIntersect(const reference uniform float3 bounds[2], +static inline bool BBoxIntersect(const uniform float bounds[2][3], const reference Ray ray) { + uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] }; + uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] }; float t0 = ray.mint, t1 = ray.maxt; // Check all three axis-aligned slabs. Don't try to early out; it's // not worth the trouble - float3 tNear = (bounds[0] - ray.origin) * ray.invDir; - float3 tFar = (bounds[1] - ray.origin) * ray.invDir; + float3 tNear = (bounds0 - ray.origin) * ray.invDir; + float3 tFar = (bounds1 - ray.origin) * ray.invDir; if (tNear.x > tFar.x) { float tmp = tNear.x; tNear.x = tFar.x; @@ -141,8 +144,11 @@ static inline bool BBoxIntersect(const reference uniform float3 bounds[2], static inline bool TriIntersect(const reference Triangle tri, reference Ray ray) { - uniform float3 e1 = tri.p[1] - tri.p[0]; - uniform float3 e2 = tri.p[2] - tri.p[0]; + uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] }; + uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] }; + uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] }; + uniform float3 e1 = p1 - p0; + uniform float3 e2 = p2 - p0; float3 s1 = Cross(ray.dir, e2); float divisor = Dot(s1, e1); @@ -153,7 +159,7 @@ static inline bool TriIntersect(const reference Triangle tri, reference Ray ray) float invDivisor = 1.f / divisor; // Compute first barycentric coordinate - float3 d = ray.origin - tri.p[0]; + float3 d = ray.origin - p0; float b1 = Dot(d, s1) * invDivisor; if (b1 < 0. || b1 > 1.) hit = false; diff --git a/examples/rt/rt.vcxproj b/examples/rt/rt.vcxproj index ebd3ae28..d7f2f775 100644 --- a/examples/rt/rt.vcxproj +++ b/examples/rt/rt.vcxproj @@ -81,6 +81,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -96,6 +97,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -113,6 +115,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -131,6 +134,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -144,21 +148,21 @@ Document -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - %(Filename).obj - %(Filename).obj + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - %(Filename).obj - %(Filename).obj + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/rt/rt_serial.cpp b/examples/rt/rt_serial.cpp index 6ce1a34b..cc413dea 100644 --- a/examples/rt/rt_serial.cpp +++ b/examples/rt/rt_serial.cpp @@ -75,12 +75,13 @@ struct Ray { // Declare these in a namespace so the mangling matches namespace ispc { struct Triangle { - float3 p[3]; + float p[3][4]; // extra float pad after each vertex int32_t id; + int32_t pad[3]; // make 16 x 32-bits }; struct LinearBVHNode { - float3 bounds[2]; + float bounds[2][3]; int32_t offset; // primitives for leaf, second child for interior uint8_t nPrimitives; uint8_t splitAxis; @@ -140,12 +141,14 @@ static void generateRay(const float raster2camera[4][4], } -static inline bool BBoxIntersect(const float3 bounds[2], +static inline bool BBoxIntersect(const float bounds[2][3], const Ray &ray) { + float3 bounds0(bounds[0][0], bounds[0][1], bounds[0][2]); + float3 bounds1(bounds[1][0], bounds[1][1], bounds[1][2]); float t0 = ray.mint, t1 = ray.maxt; - float3 tNear = (bounds[0] - ray.origin) * ray.invDir; - float3 tFar = (bounds[1] - ray.origin) * ray.invDir; + float3 tNear = (bounds0 - ray.origin) * ray.invDir; + float3 tFar = (bounds1 - ray.origin) * ray.invDir; if (tNear.x > tFar.x) { float tmp = tNear.x; tNear.x = tFar.x; @@ -176,8 +179,11 @@ static inline bool BBoxIntersect(const float3 bounds[2], inline bool TriIntersect(const Triangle &tri, Ray &ray) { - float3 e1 = tri.p[1] - tri.p[0]; - float3 e2 = tri.p[2] - tri.p[0]; + float3 p0(tri.p[0][0], tri.p[0][1], tri.p[0][2]); + float3 p1(tri.p[1][0], tri.p[1][1], tri.p[1][2]); + float3 p2(tri.p[2][0], tri.p[2][1], tri.p[2][2]); + float3 e1 = p1 - p0; + float3 e2 = p2 - p0; float3 s1 = Cross(ray.dir, e2); float divisor = Dot(s1, e1); @@ -187,7 +193,7 @@ inline bool TriIntersect(const Triangle &tri, Ray &ray) { float invDivisor = 1.f / divisor; // Compute first barycentric coordinate - float3 d = ray.origin - tri.p[0]; + float3 d = ray.origin - p0; float b1 = Dot(d, s1) * invDivisor; if (b1 < 0. || b1 > 1.) return false; diff --git a/examples/simple/Makefile b/examples/simple/Makefile index 207369e3..80f09193 100644 --- a/examples/simple/Makefile +++ b/examples/simple/Makefile @@ -2,7 +2,7 @@ CXX=g++ -m64 CXXFLAGS=-Iobjs/ -O3 -Wall ISPC=ispc -ISPCFLAGS=-O2 --arch=x86-64 +ISPCFLAGS=-O2 --arch=x86-64 --target=sse2 default: simple diff --git a/examples/simple/simple.vcxproj b/examples/simple/simple.vcxproj index 2c9caf88..162b6c45 100644 --- a/examples/simple/simple.vcxproj +++ b/examples/simple/simple.vcxproj @@ -25,21 +25,21 @@ Document -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2 -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2 - %(Filename).obj - %(Filename).obj + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2 -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2 - %(Filename).obj - %(Filename).obj + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h @@ -105,6 +105,7 @@ ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Console @@ -118,6 +119,7 @@ ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Console @@ -133,6 +135,7 @@ ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Console @@ -150,6 +153,7 @@ ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Console @@ -161,4 +165,4 @@ ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h - + diff --git a/examples/stencil/Makefile b/examples/stencil/Makefile index 7caf4e62..b479cc32 100644 --- a/examples/stencil/Makefile +++ b/examples/stencil/Makefile @@ -8,7 +8,11 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o))) CXX=g++ CXXFLAGS=-Iobjs/ -O3 -Wall -m64 ISPC=ispc -ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64 +ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64 + +OBJS=objs/stencil.o objs/stencil_serial.o $(TASK_OBJ) objs/stencil_ispc.o \ + objs/stencil_ispc_sse2.o objs/stencil_ispc_sse4.o \ + objs/stencil_ispc_avx.o default: stencil @@ -20,8 +24,8 @@ dirs: clean: /bin/rm -rf objs *~ stencil -stencil: dirs objs/stencil.o objs/stencil_serial.o objs/stencil_ispc.o $(TASK_OBJ) - $(CXX) $(CXXFLAGS) -o $@ objs/stencil.o objs/stencil_ispc.o objs/stencil_serial.o $(TASK_OBJ) -lm $(TASK_LIB) +stencil: dirs $(OBJS) + $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB) objs/%.o: %.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ @@ -31,5 +35,5 @@ objs/%.o: ../%.cpp objs/stencil.o: objs/stencil_ispc.h -objs/%_ispc.h objs/%_ispc.o: %.ispc +objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h diff --git a/examples/stencil/stencil.vcxproj b/examples/stencil/stencil.vcxproj index d6d130b9..0213c492 100644 --- a/examples/stencil/stencil.vcxproj +++ b/examples/stencil/stencil.vcxproj @@ -81,6 +81,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -96,6 +97,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -113,6 +115,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -131,6 +134,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -144,21 +148,21 @@ Document -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - %(Filename).obj - %(Filename).obj + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx -ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h +ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx - %(Filename).obj - %(Filename).obj + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h diff --git a/examples/volume_rendering/Makefile b/examples/volume_rendering/Makefile index 0458c017..fa8ff753 100644 --- a/examples/volume_rendering/Makefile +++ b/examples/volume_rendering/Makefile @@ -8,7 +8,10 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o))) CXX=g++ CXXFLAGS=-Iobjs/ -O3 -Wall -m64 ISPC=ispc -ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64 +ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64 + +OBJS=objs/volume.o objs/volume_serial.o $(TASK_OBJ) objs/volume_ispc.o \ + objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o default: volume @@ -20,8 +23,8 @@ dirs: clean: /bin/rm -rf objs *~ volume -volume: dirs objs/volume.o objs/volume_serial.o objs/volume_ispc.o $(TASK_OBJ) - $(CXX) $(CXXFLAGS) -o $@ objs/volume.o objs/volume_ispc.o objs/volume_serial.o $(TASK_OBJ) -lm $(TASK_LIB) +volume: dirs $(OBJS) + $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB) objs/%.o: %.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ @@ -31,5 +34,5 @@ objs/%.o: ../%.cpp objs/volume.o: objs/volume_ispc.h -objs/%_ispc.h objs/%_ispc.o: %.ispc +objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o: %.ispc $(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h diff --git a/examples/volume_rendering/volume.vcxproj b/examples/volume_rendering/volume.vcxproj index 540c8421..1531d4e5 100644 --- a/examples/volume_rendering/volume.vcxproj +++ b/examples/volume_rendering/volume.vcxproj @@ -81,6 +81,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -96,6 +97,7 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) true Fast @@ -113,6 +115,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -131,6 +134,7 @@ true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) Fast @@ -148,18 +152,18 @@ Document - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2 + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2 - ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2 + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2 - %(Filename).obj;%(Filename)_ispc.h - %(Filename).obj;%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h