diff --git a/examples/aobench/Makefile b/examples/aobench/Makefile
index e8fce406..66fde5d3 100644
--- a/examples/aobench/Makefile
+++ b/examples/aobench/Makefile
@@ -8,7 +8,11 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
CXX=g++
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
ISPC=ispc
-ISPCFLAGS=-O2 --target=sse4 --arch=x86-64
+ISPCFLAGS=-O2 --target=sse2,sse4,avx --arch=x86-64
+
+ISPC_OBJS=objs/ao_ispc.o objs/ao_ispc_sse2.o objs/ao_ispc_sse4.o \
+ objs/ao_ispc_avx.o
+OBJS=objs/ao.o objs/ao_serial.o $(ISPC_OBJS) $(TASK_OBJ)
default: ao
@@ -20,8 +24,8 @@ dirs:
clean:
/bin/rm -rf objs *~ ao
-ao: dirs objs/ao.o objs/ao_serial.o objs/ao_ispc.o $(TASK_OBJ)
- $(CXX) $(CXXFLAGS) -o $@ objs/ao.o objs/ao_ispc.o objs/ao_serial.o $(TASK_OBJ) -lm $(TASK_LIB)
+ao: dirs $(OBJS) $(TASK_OBJ)
+ $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
objs/%.o: %.cpp
$(CXX) $< $(CXXFLAGS) -c -o $@
@@ -31,5 +35,5 @@ objs/%.o: ../%.cpp
objs/ao.o: objs/ao_ispc.h
-objs/%_ispc.h objs/%_ispc.o: %.ispc
+objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
diff --git a/examples/aobench/aobench.vcxproj b/examples/aobench/aobench.vcxproj
index af754e26..5d61f0bb 100644
--- a/examples/aobench/aobench.vcxproj
+++ b/examples/aobench/aobench.vcxproj
@@ -26,18 +26,18 @@
Document
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx
- %(Filename).obj
- %(Filename).obj
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx
- %(Filename).obj
- %(Filename).obj
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
@@ -103,6 +103,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -118,6 +119,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -135,6 +137,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -153,6 +156,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
diff --git a/examples/aobench_instrumented/Makefile b/examples/aobench_instrumented/Makefile
index 20a5a9b9..bfcdfc48 100644
--- a/examples/aobench_instrumented/Makefile
+++ b/examples/aobench_instrumented/Makefile
@@ -2,7 +2,7 @@
CXX=g++ -m64
CXXFLAGS=-Iobjs/ -g3 -Wall
ISPC=ispc
-ISPCFLAGS=-O2 --instrument --arch=x86-64
+ISPCFLAGS=-O2 --instrument --arch=x86-64 --target=sse2
default: ao
diff --git a/examples/aobench_instrumented/aobench_instrumented.vcxproj b/examples/aobench_instrumented/aobench_instrumented.vcxproj
index 3dec13e1..3012e236 100644
--- a/examples/aobench_instrumented/aobench_instrumented.vcxproj
+++ b/examples/aobench_instrumented/aobench_instrumented.vcxproj
@@ -1,4 +1,4 @@
-
+
@@ -25,18 +25,18 @@
Document
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --instrument
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --instrument --target=sse2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --instrument
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --instrument --target=sse2
- %(Filename).obj
- %(Filename).obj
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --instrument
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --instrument --target=sse2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --instrument
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --instrument --target=sse2
- %(Filename).obj
- %(Filename).obj
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h
@@ -102,6 +102,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ $(TargetDir)
Console
@@ -115,6 +116,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ $(TargetDir)
Console
@@ -130,6 +132,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ $(TargetDir)
Console
@@ -147,6 +150,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ $(TargetDir)
Console
diff --git a/examples/deferred/Makefile b/examples/deferred/Makefile
index 840af3de..a6bc5e53 100644
--- a/examples/deferred/Makefile
+++ b/examples/deferred/Makefile
@@ -8,9 +8,11 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
CXX=g++
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
ISPC=ispc
-ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64 --math-lib=fast
+ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64 --math-lib=fast
-OBJS=objs/main.o objs/common.o objs/kernels_ispc.o objs/dynamic_c.o objs/dynamic_cilk.o
+OBJS=objs/main.o objs/common.o objs/kernels_ispc.o objs/kernels_ispc_sse2.o \
+ objs/kernels_ispc_sse4.o objs/kernels_ispc_avx.o \
+ objs/dynamic_c.o objs/dynamic_cilk.o
default: deferred_shading
@@ -32,5 +34,5 @@ objs/%.o: %.cpp objs/kernels_ispc.h deferred.h
objs/%.o: ../%.cpp
$(CXX) $< $(CXXFLAGS) -c -o $@
-objs/%_ispc.h objs/%_ispc.o: %.ispc
+objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
diff --git a/examples/deferred/deferred_shading.vcxproj b/examples/deferred/deferred_shading.vcxproj
index b87bec51..9b429120 100755
--- a/examples/deferred/deferred_shading.vcxproj
+++ b/examples/deferred/deferred_shading.vcxproj
@@ -81,6 +81,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -96,6 +97,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -113,6 +115,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -131,6 +134,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -150,18 +154,18 @@
Document
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
diff --git a/examples/deferred/kernels.ispc b/examples/deferred/kernels.ispc
index 7fc046e4..65fa1547 100644
--- a/examples/deferred/kernels.ispc
+++ b/examples/deferred/kernels.ispc
@@ -66,8 +66,6 @@ struct InputHeader
uniform int32 inputDataArrayOffsets[idaNum];
};
-export void foo(reference InputHeader h) { }
-
///////////////////////////////////////////////////////////////////////////
// Common utility routines
diff --git a/examples/mandelbrot/Makefile b/examples/mandelbrot/Makefile
index a78e4743..256500c9 100644
--- a/examples/mandelbrot/Makefile
+++ b/examples/mandelbrot/Makefile
@@ -2,7 +2,7 @@
CXX=g++ -m64
CXXFLAGS=-Iobjs/ -O3 -Wall
ISPC=ispc
-ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
+ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64
default: mandelbrot
@@ -14,13 +14,17 @@ dirs:
clean:
/bin/rm -rf objs *~ mandelbrot
-mandelbrot: dirs objs/mandelbrot.o objs/mandelbrot_serial.o objs/mandelbrot_ispc.o
- $(CXX) $(CXXFLAGS) -o $@ objs/mandelbrot.o objs/mandelbrot_ispc.o objs/mandelbrot_serial.o -lm
+OBJS=objs/mandelbrot.o objs/mandelbrot_serial.o objs/mandelbrot_ispc_sse2.o \
+ objs/mandelbrot_ispc_sse4.o objs/mandelbrot_ispc_avx.o \
+ objs/mandelbrot_ispc.o
+
+mandelbrot: dirs $(OBJS)
+ $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm
objs/%.o: %.cpp
$(CXX) $< $(CXXFLAGS) -c -o $@
objs/mandelbrot.o: objs/mandelbrot_ispc.h
-objs/%_ispc.h objs/%_ispc.o: %.ispc
+objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
diff --git a/examples/mandelbrot/mandelbrot.vcxproj b/examples/mandelbrot/mandelbrot.vcxproj
index d37f210f..2365f5d8 100644
--- a/examples/mandelbrot/mandelbrot.vcxproj
+++ b/examples/mandelbrot/mandelbrot.vcxproj
@@ -1,4 +1,4 @@
-
+
@@ -81,6 +81,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -96,6 +97,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -113,6 +115,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -131,6 +134,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -147,18 +151,18 @@
Document
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
diff --git a/examples/mandelbrot_tasks/Makefile b/examples/mandelbrot_tasks/Makefile
index 55b8b03f..10e1e3d9 100644
--- a/examples/mandelbrot_tasks/Makefile
+++ b/examples/mandelbrot_tasks/Makefile
@@ -8,7 +8,11 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
CXX=g++
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
ISPC=ispc
-ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
+ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64
+
+OBJS=objs/mandelbrot.o objs/mandelbrot_serial.o $(TASK_OBJ) \
+ objs/mandelbrot_ispc.o objs/mandelbrot_ispc_sse2.o \
+ objs/mandelbrot_ispc_sse4.o objs/mandelbrot_ispc_avx.o
default: mandelbrot
@@ -20,8 +24,8 @@ dirs:
clean:
/bin/rm -rf objs *~ mandelbrot
-mandelbrot: dirs objs/mandelbrot.o objs/mandelbrot_serial.o objs/mandelbrot_ispc.o $(TASK_OBJ)
- $(CXX) $(CXXFLAGS) -o $@ objs/mandelbrot.o objs/mandelbrot_ispc.o objs/mandelbrot_serial.o $(TASK_OBJ) -lm $(TASK_LIB)
+mandelbrot: dirs $(OBJS)
+ $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
objs/%.o: %.cpp
$(CXX) $< $(CXXFLAGS) -c -o $@
@@ -31,5 +35,5 @@ objs/%.o: ../%.cpp
objs/mandelbrot.o: objs/mandelbrot_ispc.h
-objs/%_ispc.h objs/%_ispc.o: %.ispc
+objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj
index bc247f4e..86bf7292 100644
--- a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj
+++ b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj
@@ -81,6 +81,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -96,6 +97,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -113,6 +115,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -131,6 +134,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -148,18 +152,18 @@
Document
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
diff --git a/examples/noise/Makefile b/examples/noise/Makefile
index 66f4401d..f2158742 100644
--- a/examples/noise/Makefile
+++ b/examples/noise/Makefile
@@ -2,7 +2,10 @@
CXX=g++ -m64
CXXFLAGS=-Iobjs/ -O3 -Wall
ISPC=ispc
-ISPCFLAGS=-O2 --target=sse4 --arch=x86-64
+ISPCFLAGS=-O2 --target=sse2,sse4,avx-x2 --arch=x86-64
+
+OBJS=objs/noise.o objs/noise_serial.o objs/noise_ispc.o objs/noise_ispc_sse2.o \
+ objs/noise_ispc_sse4.o objs/noise_ispc_avx.o
default: noise
@@ -14,13 +17,13 @@ dirs:
clean:
/bin/rm -rf objs *~ noise
-noise: dirs objs/noise.o objs/noise_serial.o objs/noise_ispc.o
- $(CXX) $(CXXFLAGS) -o $@ objs/noise.o objs/noise_ispc.o objs/noise_serial.o -lm
+noise: dirs $(OBJS)
+ $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm
objs/%.o: %.cpp
$(CXX) $< $(CXXFLAGS) -c -o $@
objs/noise.o: objs/noise_ispc.h
-objs/%_ispc.h objs/%_ispc.o: %.ispc
+objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
diff --git a/examples/noise/noise.vcxproj b/examples/noise/noise.vcxproj
index 03f47da6..4ccbe588 100644
--- a/examples/noise/noise.vcxproj
+++ b/examples/noise/noise.vcxproj
@@ -1,4 +1,4 @@
-
+
@@ -81,6 +81,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -96,6 +97,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -113,6 +115,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -131,6 +134,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -147,18 +151,18 @@
Document
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
diff --git a/examples/options/Makefile b/examples/options/Makefile
index d5f519f6..4163ba3a 100644
--- a/examples/options/Makefile
+++ b/examples/options/Makefile
@@ -2,7 +2,11 @@
CXX=g++ -m64
CXXFLAGS=-Iobjs/ -g -Wall
ISPC=ispc
-ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
+ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64
+
+OBJS=objs/options.o objs/options_serial.o objs/options_ispc.o \
+ objs/options_ispc_sse2.o objs/options_ispc_sse4.o \
+ objs/options_ispc_avx.o
default: options
@@ -14,13 +18,13 @@ dirs:
clean:
/bin/rm -rf objs *~ options
-options: dirs objs/options.o objs/options_serial.o objs/options_ispc.o
- $(CXX) $(CXXFLAGS) -o $@ objs/options.o objs/options_ispc.o objs/options_serial.o -lm
+options: dirs $(OBJS)
+ $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm
objs/%.o: %.cpp
$(CXX) $< $(CXXFLAGS) -c -o $@
objs/options.o: objs/options_ispc.h options_defs.h
-objs/%_ispc.h objs/%_ispc.o: %.ispc options_defs.h
+objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc options_defs.h
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
diff --git a/examples/options/options.vcxproj b/examples/options/options.vcxproj
index 88fe2efb..e776dc98 100644
--- a/examples/options/options.vcxproj
+++ b/examples/options/options.vcxproj
@@ -1,4 +1,4 @@
-
+
@@ -81,6 +81,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
4305
true
Fast
@@ -97,6 +98,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
4305
true
Fast
@@ -115,6 +117,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
4305
Fast
@@ -134,6 +137,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
4305
Fast
@@ -151,18 +155,18 @@
Document
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
diff --git a/examples/rt/Makefile b/examples/rt/Makefile
index 6d5b1e1c..f5f2318f 100644
--- a/examples/rt/Makefile
+++ b/examples/rt/Makefile
@@ -8,7 +8,10 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
CXX=g++
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
ISPC=ispc
-ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
+ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64
+
+OBJS=objs/rt.o objs/rt_serial.o $(TASK_OBJ) objs/rt_ispc.o objs/rt_ispc_sse2.o \
+ objs/rt_ispc_sse4.o objs/rt_ispc_avx.o
default: rt
@@ -20,8 +23,8 @@ dirs:
clean:
/bin/rm -rf objs *~ rt
-rt: dirs objs/rt.o objs/rt_serial.o objs/rt_ispc.o $(TASK_OBJ)
- $(CXX) $(CXXFLAGS) -o $@ objs/rt.o objs/rt_ispc.o objs/rt_serial.o $(TASK_OBJ) -lm $(TASK_LIB)
+rt: dirs $(OBJS)
+ $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
objs/%.o: %.cpp
$(CXX) $< $(CXXFLAGS) -c -o $@
@@ -31,5 +34,5 @@ objs/%.o: ../%.cpp
objs/rt.o: objs/rt_ispc.h
-objs/%_ispc.h objs/%_ispc.o: %.ispc
+objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
diff --git a/examples/rt/rt.cpp b/examples/rt/rt.cpp
index 4745f01d..4bc07c5e 100644
--- a/examples/rt/rt.cpp
+++ b/examples/rt/rt.cpp
@@ -168,12 +168,12 @@ int main(int argc, char *argv[]) {
// of node, the total number of int it if a leaf node, etc.
float b[6];
READ(b[0], 6);
- nodes[i].bounds[0].v[0] = b[0];
- nodes[i].bounds[0].v[1] = b[1];
- nodes[i].bounds[0].v[2] = b[2];
- nodes[i].bounds[1].v[0] = b[3];
- nodes[i].bounds[1].v[1] = b[4];
- nodes[i].bounds[1].v[2] = b[5];
+ nodes[i].bounds[0][0] = b[0];
+ nodes[i].bounds[0][1] = b[1];
+ nodes[i].bounds[0][2] = b[2];
+ nodes[i].bounds[1][0] = b[3];
+ nodes[i].bounds[1][1] = b[4];
+ nodes[i].bounds[1][2] = b[5];
READ(nodes[i].offset, 1);
READ(nodes[i].nPrimitives, 1);
READ(nodes[i].splitAxis, 1);
@@ -190,9 +190,9 @@ int main(int argc, char *argv[]) {
READ(v[0], 9);
float *vp = v;
for (int j = 0; j < 3; ++j) {
- triangles[i].p[j].v[0] = *vp++;
- triangles[i].p[j].v[1] = *vp++;
- triangles[i].p[j].v[2] = *vp++;
+ triangles[i].p[j][0] = *vp++;
+ triangles[i].p[j][1] = *vp++;
+ triangles[i].p[j][2] = *vp++;
}
// And create an object id
triangles[i].id = i+1;
diff --git a/examples/rt/rt.ispc b/examples/rt/rt.ispc
index 9f57ec14..88a4a7f6 100644
--- a/examples/rt/rt.ispc
+++ b/examples/rt/rt.ispc
@@ -43,12 +43,13 @@ struct Ray {
};
struct Triangle {
- uniform float3 p[3];
+ uniform float p[3][4];
uniform int id;
+ uniform int pad[3];
};
struct LinearBVHNode {
- uniform float3 bounds[2];
+ uniform float bounds[2][3];
uniform unsigned int offset; // num primitives for leaf, second child for interior
uniform unsigned int8 nPrimitives;
uniform unsigned int8 splitAxis;
@@ -103,14 +104,16 @@ static void generateRay(uniform const float raster2camera[4][4],
}
-static inline bool BBoxIntersect(const reference uniform float3 bounds[2],
+static inline bool BBoxIntersect(const uniform float bounds[2][3],
const reference Ray ray) {
+ uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
+ uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
float t0 = ray.mint, t1 = ray.maxt;
// Check all three axis-aligned slabs. Don't try to early out; it's
// not worth the trouble
- float3 tNear = (bounds[0] - ray.origin) * ray.invDir;
- float3 tFar = (bounds[1] - ray.origin) * ray.invDir;
+ float3 tNear = (bounds0 - ray.origin) * ray.invDir;
+ float3 tFar = (bounds1 - ray.origin) * ray.invDir;
if (tNear.x > tFar.x) {
float tmp = tNear.x;
tNear.x = tFar.x;
@@ -141,8 +144,11 @@ static inline bool BBoxIntersect(const reference uniform float3 bounds[2],
static inline bool TriIntersect(const reference Triangle tri, reference Ray ray) {
- uniform float3 e1 = tri.p[1] - tri.p[0];
- uniform float3 e2 = tri.p[2] - tri.p[0];
+ uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
+ uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
+ uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
+ uniform float3 e1 = p1 - p0;
+ uniform float3 e2 = p2 - p0;
float3 s1 = Cross(ray.dir, e2);
float divisor = Dot(s1, e1);
@@ -153,7 +159,7 @@ static inline bool TriIntersect(const reference Triangle tri, reference Ray ray)
float invDivisor = 1.f / divisor;
// Compute first barycentric coordinate
- float3 d = ray.origin - tri.p[0];
+ float3 d = ray.origin - p0;
float b1 = Dot(d, s1) * invDivisor;
if (b1 < 0. || b1 > 1.)
hit = false;
diff --git a/examples/rt/rt.vcxproj b/examples/rt/rt.vcxproj
index ebd3ae28..d7f2f775 100644
--- a/examples/rt/rt.vcxproj
+++ b/examples/rt/rt.vcxproj
@@ -81,6 +81,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -96,6 +97,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -113,6 +115,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -131,6 +134,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -144,21 +148,21 @@
Document
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
- %(Filename).obj
- %(Filename).obj
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
- %(Filename).obj
- %(Filename).obj
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
diff --git a/examples/rt/rt_serial.cpp b/examples/rt/rt_serial.cpp
index 6ce1a34b..cc413dea 100644
--- a/examples/rt/rt_serial.cpp
+++ b/examples/rt/rt_serial.cpp
@@ -75,12 +75,13 @@ struct Ray {
// Declare these in a namespace so the mangling matches
namespace ispc {
struct Triangle {
- float3 p[3];
+ float p[3][4]; // extra float pad after each vertex
int32_t id;
+ int32_t pad[3]; // make 16 x 32-bits
};
struct LinearBVHNode {
- float3 bounds[2];
+ float bounds[2][3];
int32_t offset; // primitives for leaf, second child for interior
uint8_t nPrimitives;
uint8_t splitAxis;
@@ -140,12 +141,14 @@ static void generateRay(const float raster2camera[4][4],
}
-static inline bool BBoxIntersect(const float3 bounds[2],
+static inline bool BBoxIntersect(const float bounds[2][3],
const Ray &ray) {
+ float3 bounds0(bounds[0][0], bounds[0][1], bounds[0][2]);
+ float3 bounds1(bounds[1][0], bounds[1][1], bounds[1][2]);
float t0 = ray.mint, t1 = ray.maxt;
- float3 tNear = (bounds[0] - ray.origin) * ray.invDir;
- float3 tFar = (bounds[1] - ray.origin) * ray.invDir;
+ float3 tNear = (bounds0 - ray.origin) * ray.invDir;
+ float3 tFar = (bounds1 - ray.origin) * ray.invDir;
if (tNear.x > tFar.x) {
float tmp = tNear.x;
tNear.x = tFar.x;
@@ -176,8 +179,11 @@ static inline bool BBoxIntersect(const float3 bounds[2],
inline bool TriIntersect(const Triangle &tri, Ray &ray) {
- float3 e1 = tri.p[1] - tri.p[0];
- float3 e2 = tri.p[2] - tri.p[0];
+ float3 p0(tri.p[0][0], tri.p[0][1], tri.p[0][2]);
+ float3 p1(tri.p[1][0], tri.p[1][1], tri.p[1][2]);
+ float3 p2(tri.p[2][0], tri.p[2][1], tri.p[2][2]);
+ float3 e1 = p1 - p0;
+ float3 e2 = p2 - p0;
float3 s1 = Cross(ray.dir, e2);
float divisor = Dot(s1, e1);
@@ -187,7 +193,7 @@ inline bool TriIntersect(const Triangle &tri, Ray &ray) {
float invDivisor = 1.f / divisor;
// Compute first barycentric coordinate
- float3 d = ray.origin - tri.p[0];
+ float3 d = ray.origin - p0;
float b1 = Dot(d, s1) * invDivisor;
if (b1 < 0. || b1 > 1.)
return false;
diff --git a/examples/simple/Makefile b/examples/simple/Makefile
index 207369e3..80f09193 100644
--- a/examples/simple/Makefile
+++ b/examples/simple/Makefile
@@ -2,7 +2,7 @@
CXX=g++ -m64
CXXFLAGS=-Iobjs/ -O3 -Wall
ISPC=ispc
-ISPCFLAGS=-O2 --arch=x86-64
+ISPCFLAGS=-O2 --arch=x86-64 --target=sse2
default: simple
diff --git a/examples/simple/simple.vcxproj b/examples/simple/simple.vcxproj
index 2c9caf88..162b6c45 100644
--- a/examples/simple/simple.vcxproj
+++ b/examples/simple/simple.vcxproj
@@ -25,21 +25,21 @@
Document
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2
- %(Filename).obj
- %(Filename).obj
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2
- %(Filename).obj
- %(Filename).obj
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h
@@ -105,6 +105,7 @@ ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Console
@@ -118,6 +119,7 @@ ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Console
@@ -133,6 +135,7 @@ ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Console
@@ -150,6 +153,7 @@ ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Console
@@ -161,4 +165,4 @@ ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
-
+
diff --git a/examples/stencil/Makefile b/examples/stencil/Makefile
index 7caf4e62..b479cc32 100644
--- a/examples/stencil/Makefile
+++ b/examples/stencil/Makefile
@@ -8,7 +8,11 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
CXX=g++
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
ISPC=ispc
-ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
+ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64
+
+OBJS=objs/stencil.o objs/stencil_serial.o $(TASK_OBJ) objs/stencil_ispc.o \
+ objs/stencil_ispc_sse2.o objs/stencil_ispc_sse4.o \
+ objs/stencil_ispc_avx.o
default: stencil
@@ -20,8 +24,8 @@ dirs:
clean:
/bin/rm -rf objs *~ stencil
-stencil: dirs objs/stencil.o objs/stencil_serial.o objs/stencil_ispc.o $(TASK_OBJ)
- $(CXX) $(CXXFLAGS) -o $@ objs/stencil.o objs/stencil_ispc.o objs/stencil_serial.o $(TASK_OBJ) -lm $(TASK_LIB)
+stencil: dirs $(OBJS)
+ $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
objs/%.o: %.cpp
$(CXX) $< $(CXXFLAGS) -c -o $@
@@ -31,5 +35,5 @@ objs/%.o: ../%.cpp
objs/stencil.o: objs/stencil_ispc.h
-objs/%_ispc.h objs/%_ispc.o: %.ispc
+objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
diff --git a/examples/stencil/stencil.vcxproj b/examples/stencil/stencil.vcxproj
index d6d130b9..0213c492 100644
--- a/examples/stencil/stencil.vcxproj
+++ b/examples/stencil/stencil.vcxproj
@@ -81,6 +81,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -96,6 +97,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -113,6 +115,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -131,6 +134,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -144,21 +148,21 @@
Document
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
- %(Filename).obj
- %(Filename).obj
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
-ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h
+ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
- %(Filename).obj
- %(Filename).obj
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h
diff --git a/examples/volume_rendering/Makefile b/examples/volume_rendering/Makefile
index 0458c017..fa8ff753 100644
--- a/examples/volume_rendering/Makefile
+++ b/examples/volume_rendering/Makefile
@@ -8,7 +8,10 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
CXX=g++
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
ISPC=ispc
-ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
+ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64
+
+OBJS=objs/volume.o objs/volume_serial.o $(TASK_OBJ) objs/volume_ispc.o \
+ objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o
default: volume
@@ -20,8 +23,8 @@ dirs:
clean:
/bin/rm -rf objs *~ volume
-volume: dirs objs/volume.o objs/volume_serial.o objs/volume_ispc.o $(TASK_OBJ)
- $(CXX) $(CXXFLAGS) -o $@ objs/volume.o objs/volume_ispc.o objs/volume_serial.o $(TASK_OBJ) -lm $(TASK_LIB)
+volume: dirs $(OBJS)
+ $(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
objs/%.o: %.cpp
$(CXX) $< $(CXXFLAGS) -c -o $@
@@ -31,5 +34,5 @@ objs/%.o: ../%.cpp
objs/volume.o: objs/volume_ispc.h
-objs/%_ispc.h objs/%_ispc.o: %.ispc
+objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o: %.ispc
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
diff --git a/examples/volume_rendering/volume.vcxproj b/examples/volume_rendering/volume.vcxproj
index 540c8421..1531d4e5 100644
--- a/examples/volume_rendering/volume.vcxproj
+++ b/examples/volume_rendering/volume.vcxproj
@@ -81,6 +81,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -96,6 +97,7 @@
Level3
Disabled
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
true
Fast
@@ -113,6 +115,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -131,6 +134,7 @@
true
true
WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(TargetDir)
Fast
@@ -148,18 +152,18 @@
Document
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
- ispc -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
+ ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2
- %(Filename).obj;%(Filename)_ispc.h
- %(Filename).obj;%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h
+ $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h