Update volume rendering workload: use AVX, remove reduce_equal() path.
Both of these changes gave a performance benefit!
This commit is contained in:
@@ -8,10 +8,10 @@ TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64
|
||||
|
||||
OBJS=objs/volume.o objs/volume_serial.o $(TASK_OBJ) objs/volume_ispc.o \
|
||||
objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o
|
||||
objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o objs/volume_ispc_avx.o
|
||||
|
||||
default: volume
|
||||
|
||||
@@ -34,5 +34,5 @@ objs/%.o: ../%.cpp
|
||||
|
||||
objs/volume.o: objs/volume_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o: %.ispc
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
|
||||
Reference in New Issue
Block a user