From 93849370b1814550f339ced66570aa74830fc606 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Thu, 20 Feb 2014 12:57:50 +0100 Subject: [PATCH] added cpu compilation path --- examples/portable/aobench/Makefile_cpu | 2 +- examples/portable/aobench/Makefile_gpu | 2 +- examples/portable/aobench/ao.cpp | 34 +------ examples/portable/common_cpu.mk | 120 +++++++++++++++++++++++++ examples/tasksys.cpp | 13 ++- 5 files changed, 134 insertions(+), 37 deletions(-) create mode 100644 examples/portable/common_cpu.mk diff --git a/examples/portable/aobench/Makefile_cpu b/examples/portable/aobench/Makefile_cpu index d5fd50eb..6dd16131 100644 --- a/examples/portable/aobench/Makefile_cpu +++ b/examples/portable/aobench/Makefile_cpu @@ -5,4 +5,4 @@ ISPC_SRC=ao.ispc ISPC_IA_TARGETS=avx1-i32x8 ISPC_ARM_TARGETS=neon -include ../common.mk +include ../common_cpu.mk diff --git a/examples/portable/aobench/Makefile_gpu b/examples/portable/aobench/Makefile_gpu index 86efc73a..db3afc57 100644 --- a/examples/portable/aobench/Makefile_gpu +++ b/examples/portable/aobench/Makefile_gpu @@ -1,7 +1,7 @@ PROG=ao ISPC_SRC=ao.ispc CU_SRC=ao.cu -CXX_SRC=ao.cpp ao_serial.cpp +CXX_SRC=ao.cpp PTXCC_REGMAX=64 #ISPC_FLAGS= --opt=disable-uniform-control-flow diff --git a/examples/portable/aobench/ao.cpp b/examples/portable/aobench/ao.cpp index 932e1d00..39b75ee9 100644 --- a/examples/portable/aobench/ao.cpp +++ b/examples/portable/aobench/ao.cpp @@ -58,8 +58,6 @@ #define NSUBSAMPLES 2 -extern void ao_serial(int w, int h, int nsubsamples, float image[]); - static unsigned int test_iterations[] = {3, 7, 1}; static unsigned int width, height; static unsigned char *img; @@ -123,13 +121,8 @@ int main(int argc, char **argv) } // Allocate space for output images -#if 0 img = new unsigned char[width * height * 3]; fimg = new float[width * height * 3]; -#else - ispc_malloc((void**) &img, sizeof(unsigned char)*width*height*3); - ispc_malloc((void**)&fimg, sizeof( float)*width*height*3); -#endif // // Run the ispc + tasks path, test_iterations times, and report the @@ -137,7 +130,7 @@ int main(int argc, char **argv) // double minTimeISPCTasks = 1e30; for (unsigned int i = 0; i < test_iterations[1]; i++) { - ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3); + ispc_memset(fimg, 0, sizeof(float) * width * height * 3); assert(NSUBSAMPLES == 2); reset_and_start_timer(); @@ -152,29 +145,8 @@ int main(int argc, char **argv) minTimeISPCTasks, width, height); savePPM("ao-ispc-tasks.ppm", width, height); - // - // Run the serial path, again test_iteration times, and report the - // minimum time. - // - double minTimeSerial = 1e30; - for (unsigned int i = 0; i < test_iterations[2]; i++) { - ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3); - reset_and_start_timer(); - ao_serial(width, height, NSUBSAMPLES, fimg); - double t = get_elapsed_msec(); - printf("@time of serial run:\t\t\t\t[%.3f] msec\n", t); - minTimeSerial = std::min(minTimeSerial, t); - } - - // Report more results, save another image... - printf("[aobench serial]:\t\t[%.3f] msec (%d x %d image)\n", minTimeSerial, - width, height); - printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", - minTimeSerial / minTimeISPCTasks); - savePPM("ao-serial.ppm", width, height); - - ispc_free(img); - ispc_free(fimg); + delete img; + delete fimg; return 0; } diff --git a/examples/portable/common_cpu.mk b/examples/portable/common_cpu.mk new file mode 100644 index 00000000..06d58e7a --- /dev/null +++ b/examples/portable/common_cpu.mk @@ -0,0 +1,120 @@ + +TASK_CXX=../../tasksys.cpp ../../util/ispc_malloc.cpp +TASK_LIB=-lpthread +TASK_OBJ=objs/tasksys.o objs/ispc_malloc.o + +CXX=clang++ +CXX=icc -openmp +CXXFLAGS+=-Iobjs/ -O2 -I../../ -I../../util +CXXFLAGS+=-DISPC_USE_OMP +CC=clang +CC=icc -openmp +CCFLAGS+=-Iobjs/ -O2 -I../../ -I../../util +CXXFLAGS+=-DISPC_USE_OMP + +LIBS=-lm $(TASK_LIB) -lstdc++ +ISPC=ispc +ISPC_FLAGS+=-O2 +ISPC_HEADER=objs/$(ISPC_SRC:.ispc=_ispc.h) + +ARCH:=$(shell uname -m | sed -e s/x86_64/x86/ -e s/i686/x86/ -e s/arm.*/arm/ -e s/sa110/arm/) + +ifeq ($(ARCH),x86) + ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc.o) + COMMA=, + ifneq (,$(findstring $(COMMA),$(ISPC_IA_TARGETS))) + #$(info multi-target detected: $(ISPC_IA_TARGETS)) + ifneq (,$(findstring sse2,$(ISPC_IA_TARGETS))) + ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_sse2.o) + endif + ifneq (,$(findstring sse4,$(ISPC_IA_TARGETS))) + ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_sse4.o) + endif + ifneq (,$(findstring avx1-,$(ISPC_IA_TARGETS))) + ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx.o) + endif + ifneq (,$(findstring avx1.1,$(ISPC_IA_TARGETS))) + ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx11.o) + endif + ifneq (,$(findstring avx2,$(ISPC_IA_TARGETS))) + ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx2.o) + endif + endif + ISPC_TARGETS=$(ISPC_IA_TARGETS) + ARCH_BIT:=$(shell getconf LONG_BIT) + ifeq ($(ARCH_BIT),32) + ISPC_FLAGS += --arch=x86 + CXXFLAGS += -m32 + CCFLAGS += -m32 + else + ISPC_FLAGS += --arch=x86-64 + CXXFLAGS += -m64 + CCFLAGS += -m64 + endif +else ifeq ($(ARCH),arm) + ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=_ispc.o)) + ISPC_TARGETS=$(ISPC_ARM_TARGETS) +else + $(error Unknown architecture $(ARCH) from uname -m) +endif + +CPP_OBJS=$(addprefix objs/, $(CPP_SRC:.cpp=.o)) +CC_OBJS=$(addprefix objs/, $(CC_SRC:.c=.o)) +OBJS=$(CPP_OBJS) $(CC_OBJS) $(TASK_OBJ) $(ISPC_OBJS) + +default: $(EXAMPLE) + +all: $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 $(EXAMPLE)-scalar + +.PHONY: dirs clean + +dirs: + /bin/mkdir -p objs/ + +objs/%.cpp objs/%.o objs/%.h: dirs + +clean: + /bin/rm -rf objs *~ $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 ref test + +$(EXAMPLE): $(OBJS) + $(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS) + +objs/%.o: %.cpp dirs $(ISPC_HEADER) + $(CXX) $< $(CXXFLAGS) -c -o $@ + +objs/%.o: %.c dirs $(ISPC_HEADER) + $(CC) $< $(CCFLAGS) -c -o $@ + +objs/%.o: ../../%.cpp dirs + $(CXX) $< $(CXXFLAGS) -c -o $@ +objs/%.o: ../../util/%.cpp dirs + $(CXX) $< $(CXXFLAGS) -c -o $@ + +objs/$(EXAMPLE).o: objs/$(EXAMPLE)_ispc.h dirs + +objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o objs/%_ispc_avx11.o objs/%_ispc_avx2.o: %.ispc dirs + $(ISPC) $(ISPC_FLAGS) --target=$(ISPC_TARGETS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h + +objs/$(ISPC_SRC:.ispc=)_sse4.cpp: $(ISPC_SRC) + $(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-4 --emit-c++ --c++-include-file=sse4.h + +objs/$(ISPC_SRC:.ispc=)_sse4.o: objs/$(ISPC_SRC:.ispc=)_sse4.cpp + $(CXX) -I../../intrinsics -msse4.2 $< $(CXXFLAGS) -c -o $@ + +$(EXAMPLE)-sse4: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_sse4.o + $(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS) + +objs/$(ISPC_SRC:.ispc=)_generic16.cpp: $(ISPC_SRC) + $(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-16 --emit-c++ --c++-include-file=generic-16.h + +objs/$(ISPC_SRC:.ispc=)_generic16.o: objs/$(ISPC_SRC:.ispc=)_generic16.cpp + $(CXX) -I../../intrinsics $< $(CXXFLAGS) -c -o $@ + +$(EXAMPLE)-generic16: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_generic16.o + $(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS) + +objs/$(ISPC_SRC:.ispc=)_scalar.o: $(ISPC_SRC) + $(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-1 + +$(EXAMPLE)-scalar: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_scalar.o + $(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS) diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index 77269f9f..f478a289 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -960,17 +960,22 @@ InitTaskSystem() { inline void TaskGroup::Launch(int baseIndex, int count) { -#pragma omp parallel for - for(int i = 0; i < count; i++) { +#pragma omp parallel + { + const int threadIndex = omp_get_thread_num(); + const int threadCount = omp_get_num_threads(); + +#pragma omp for schedule(runtime) + for(int i = 0; i < count; i++) + { TaskInfo *ti = GetTaskInfo(baseIndex + i); // Actually run the task. - int threadIndex = omp_get_thread_num(); - int threadCount = omp_get_num_threads(); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); } + } } inline void