diff --git a/examples/aobench/Makefile b/examples/aobench/Makefile index 60394e2b..207badbc 100644 --- a/examples/aobench/Makefile +++ b/examples/aobench/Makefile @@ -1,8 +1,20 @@ -CXX=g++ -m64 -CXXFLAGS=-Iobjs/ -O3 -Wall +ARCH = $(shell uname) + +TASK_CXX=../tasks_pthreads.cpp +TASK_LIB=-lpthread + +ifeq ($(ARCH), Darwin) + TASK_CXX=../tasks_gcd.cpp + TASK_LIB= +endif + +TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o))) + +CXX=g++ +CXXFLAGS=-Iobjs/ -O3 -Wall -m64 ISPC=ispc -ISPCFLAGS=-O2 --fast-math --arch=x86-64 +ISPCFLAGS=-O2 --target=sse4 --arch=x86-64 default: ao @@ -14,12 +26,15 @@ dirs: clean: /bin/rm -rf objs *~ ao -ao: dirs objs/ao.o objs/ao_serial.o objs/ao_ispc.o - $(CXX) $(CXXFLAGS) -o $@ objs/ao.o objs/ao_ispc.o objs/ao_serial.o -lm -lpthread +ao: dirs objs/ao.o objs/ao_serial.o objs/ao_ispc.o $(TASK_OBJ) + $(CXX) $(CXXFLAGS) -o $@ objs/ao.o objs/ao_ispc.o objs/ao_serial.o $(TASK_OBJ) -lm $(TASK_LIB) objs/%.o: %.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ +objs/%.o: ../%.cpp + $(CXX) $< $(CXXFLAGS) -c -o $@ + objs/ao.o: objs/ao_ispc.h objs/%_ispc.h objs/%_ispc.o: %.ispc diff --git a/examples/aobench/ao.cpp b/examples/aobench/ao.cpp index 0f7f7a12..4d7de414 100644 --- a/examples/aobench/ao.cpp +++ b/examples/aobench/ao.cpp @@ -173,10 +173,30 @@ int main(int argc, char **argv) } // Report results and save image - printf("[aobench ispc]:\t\t\t[%.3f] M cycles (%d x %d image)\n", minTimeISPC, - width, height); + printf("[aobench ispc]:\t\t\t[%.3f] M cycles (%d x %d image)\n", + minTimeISPC, width, height); savePPM("ao-ispc.ppm", width, height); + // + // Run the ispc + tasks path, test_iterations times, and report the + // minimum time for any of them. + // + double minTimeISPCTasks = 1e30; + for (unsigned int i = 0; i < test_iterations; i++) { + memset((void *)fimg, 0, sizeof(float) * width * height * 3); + assert(NSUBSAMPLES == 2); + + reset_and_start_timer(); + ao_ispc_tasks(width, height, NSUBSAMPLES, fimg); + double t = get_elapsed_mcycles(); + minTimeISPCTasks = std::min(minTimeISPCTasks, t); + } + + // Report results and save image + printf("[aobench ispc + tasks]:\t\t[%.3f] M cycles (%d x %d image)\n", + minTimeISPCTasks, width, height); + savePPM("ao-ispc-tasks.ppm", width, height); + // // Run the serial path, again test_iteration times, and report the // minimum time. @@ -193,7 +213,8 @@ int main(int argc, char **argv) // Report more results, save another image... printf("[aobench serial]:\t\t[%.3f] M cycles (%d x %d image)\n", minTimeSerial, width, height); - printf("\t\t\t\t(%.2fx speedup from ISPC)\n", minTimeSerial / minTimeISPC); + printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", + minTimeSerial / minTimeISPC, minTimeSerial / minTimeISPCTasks); savePPM("ao-serial.ppm", width, height); return 0; diff --git a/examples/aobench/ao.ispc b/examples/aobench/ao.ispc index 192e0666..7987056b 100644 --- a/examples/aobench/ao.ispc +++ b/examples/aobench/ao.ispc @@ -203,8 +203,9 @@ ambient_occlusion(reference Isect isect, reference Plane plane, /* Compute the image for the scanlines from [y0,y1), for an overall image of width w and height h. */ -void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h, - uniform int nsubsamples, reference uniform float image[]) { +static void ao_scanlines(uniform int y0, uniform int y1, uniform int w, + uniform int h, uniform int nsubsamples, + reference uniform float image[]) { static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } }; static Sphere spheres[3] = { { { -2.0f, 0.0f, -3.5f }, 0.5f }, @@ -315,3 +316,18 @@ export void ao_ispc(uniform int w, uniform int h, uniform int nsubsamples, uniform float image[]) { ao_scanlines(0, h, w, h, nsubsamples, image); } + + +static void task ao_task(uniform int y0, uniform int y1, uniform int width, + uniform int height, uniform int nsubsamples, + uniform float image[]) { + ao_scanlines(y0, y1, width, height, nsubsamples, image); +} + + +export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples, + uniform float image[]) { + uniform int dy = 1; + for (uniform int y = 0; y < h; y += dy) + launch < ao_task(y, y+dy, w, h, nsubsamples, image) >; +} diff --git a/examples/aobench/aobench.vcxproj b/examples/aobench/aobench.vcxproj index d1b3237f..085c2f3e 100755 --- a/examples/aobench/aobench.vcxproj +++ b/examples/aobench/aobench.vcxproj @@ -1,4 +1,4 @@ - + @@ -21,6 +21,7 @@ +