added cpu compilation path

This commit is contained in:
Evghenii
2014-02-20 12:57:50 +01:00
parent fc7cefcf19
commit 93849370b1
5 changed files with 134 additions and 37 deletions

View File

@@ -5,4 +5,4 @@ ISPC_SRC=ao.ispc
ISPC_IA_TARGETS=avx1-i32x8 ISPC_IA_TARGETS=avx1-i32x8
ISPC_ARM_TARGETS=neon ISPC_ARM_TARGETS=neon
include ../common.mk include ../common_cpu.mk

View File

@@ -1,7 +1,7 @@
PROG=ao PROG=ao
ISPC_SRC=ao.ispc ISPC_SRC=ao.ispc
CU_SRC=ao.cu CU_SRC=ao.cu
CXX_SRC=ao.cpp ao_serial.cpp CXX_SRC=ao.cpp
PTXCC_REGMAX=64 PTXCC_REGMAX=64
#ISPC_FLAGS= --opt=disable-uniform-control-flow #ISPC_FLAGS= --opt=disable-uniform-control-flow

View File

@@ -58,8 +58,6 @@
#define NSUBSAMPLES 2 #define NSUBSAMPLES 2
extern void ao_serial(int w, int h, int nsubsamples, float image[]);
static unsigned int test_iterations[] = {3, 7, 1}; static unsigned int test_iterations[] = {3, 7, 1};
static unsigned int width, height; static unsigned int width, height;
static unsigned char *img; static unsigned char *img;
@@ -123,13 +121,8 @@ int main(int argc, char **argv)
} }
// Allocate space for output images // Allocate space for output images
#if 0
img = new unsigned char[width * height * 3]; img = new unsigned char[width * height * 3];
fimg = new float[width * height * 3]; fimg = new float[width * height * 3];
#else
ispc_malloc((void**) &img, sizeof(unsigned char)*width*height*3);
ispc_malloc((void**)&fimg, sizeof( float)*width*height*3);
#endif
// //
// Run the ispc + tasks path, test_iterations times, and report the // Run the ispc + tasks path, test_iterations times, and report the
@@ -137,7 +130,7 @@ int main(int argc, char **argv)
// //
double minTimeISPCTasks = 1e30; double minTimeISPCTasks = 1e30;
for (unsigned int i = 0; i < test_iterations[1]; i++) { for (unsigned int i = 0; i < test_iterations[1]; i++) {
ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3); ispc_memset(fimg, 0, sizeof(float) * width * height * 3);
assert(NSUBSAMPLES == 2); assert(NSUBSAMPLES == 2);
reset_and_start_timer(); reset_and_start_timer();
@@ -152,29 +145,8 @@ int main(int argc, char **argv)
minTimeISPCTasks, width, height); minTimeISPCTasks, width, height);
savePPM("ao-ispc-tasks.ppm", width, height); savePPM("ao-ispc-tasks.ppm", width, height);
// delete img;
// Run the serial path, again test_iteration times, and report the delete fimg;
// minimum time.
//
double minTimeSerial = 1e30;
for (unsigned int i = 0; i < test_iterations[2]; i++) {
ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3);
reset_and_start_timer();
ao_serial(width, height, NSUBSAMPLES, fimg);
double t = get_elapsed_msec();
printf("@time of serial run:\t\t\t\t[%.3f] msec\n", t);
minTimeSerial = std::min(minTimeSerial, t);
}
// Report more results, save another image...
printf("[aobench serial]:\t\t[%.3f] msec (%d x %d image)\n", minTimeSerial,
width, height);
printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n",
minTimeSerial / minTimeISPCTasks);
savePPM("ao-serial.ppm", width, height);
ispc_free(img);
ispc_free(fimg);
return 0; return 0;
} }

View File

@@ -0,0 +1,120 @@
TASK_CXX=../../tasksys.cpp ../../util/ispc_malloc.cpp
TASK_LIB=-lpthread
TASK_OBJ=objs/tasksys.o objs/ispc_malloc.o
CXX=clang++
CXX=icc -openmp
CXXFLAGS+=-Iobjs/ -O2 -I../../ -I../../util
CXXFLAGS+=-DISPC_USE_OMP
CC=clang
CC=icc -openmp
CCFLAGS+=-Iobjs/ -O2 -I../../ -I../../util
CXXFLAGS+=-DISPC_USE_OMP
LIBS=-lm $(TASK_LIB) -lstdc++
ISPC=ispc
ISPC_FLAGS+=-O2
ISPC_HEADER=objs/$(ISPC_SRC:.ispc=_ispc.h)
ARCH:=$(shell uname -m | sed -e s/x86_64/x86/ -e s/i686/x86/ -e s/arm.*/arm/ -e s/sa110/arm/)
ifeq ($(ARCH),x86)
ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc.o)
COMMA=,
ifneq (,$(findstring $(COMMA),$(ISPC_IA_TARGETS)))
#$(info multi-target detected: $(ISPC_IA_TARGETS))
ifneq (,$(findstring sse2,$(ISPC_IA_TARGETS)))
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_sse2.o)
endif
ifneq (,$(findstring sse4,$(ISPC_IA_TARGETS)))
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_sse4.o)
endif
ifneq (,$(findstring avx1-,$(ISPC_IA_TARGETS)))
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx.o)
endif
ifneq (,$(findstring avx1.1,$(ISPC_IA_TARGETS)))
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx11.o)
endif
ifneq (,$(findstring avx2,$(ISPC_IA_TARGETS)))
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx2.o)
endif
endif
ISPC_TARGETS=$(ISPC_IA_TARGETS)
ARCH_BIT:=$(shell getconf LONG_BIT)
ifeq ($(ARCH_BIT),32)
ISPC_FLAGS += --arch=x86
CXXFLAGS += -m32
CCFLAGS += -m32
else
ISPC_FLAGS += --arch=x86-64
CXXFLAGS += -m64
CCFLAGS += -m64
endif
else ifeq ($(ARCH),arm)
ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=_ispc.o))
ISPC_TARGETS=$(ISPC_ARM_TARGETS)
else
$(error Unknown architecture $(ARCH) from uname -m)
endif
CPP_OBJS=$(addprefix objs/, $(CPP_SRC:.cpp=.o))
CC_OBJS=$(addprefix objs/, $(CC_SRC:.c=.o))
OBJS=$(CPP_OBJS) $(CC_OBJS) $(TASK_OBJ) $(ISPC_OBJS)
default: $(EXAMPLE)
all: $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 $(EXAMPLE)-scalar
.PHONY: dirs clean
dirs:
/bin/mkdir -p objs/
objs/%.cpp objs/%.o objs/%.h: dirs
clean:
/bin/rm -rf objs *~ $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 ref test
$(EXAMPLE): $(OBJS)
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
objs/%.o: %.cpp dirs $(ISPC_HEADER)
$(CXX) $< $(CXXFLAGS) -c -o $@
objs/%.o: %.c dirs $(ISPC_HEADER)
$(CC) $< $(CCFLAGS) -c -o $@
objs/%.o: ../../%.cpp dirs
$(CXX) $< $(CXXFLAGS) -c -o $@
objs/%.o: ../../util/%.cpp dirs
$(CXX) $< $(CXXFLAGS) -c -o $@
objs/$(EXAMPLE).o: objs/$(EXAMPLE)_ispc.h dirs
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o objs/%_ispc_avx11.o objs/%_ispc_avx2.o: %.ispc dirs
$(ISPC) $(ISPC_FLAGS) --target=$(ISPC_TARGETS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
objs/$(ISPC_SRC:.ispc=)_sse4.cpp: $(ISPC_SRC)
$(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-4 --emit-c++ --c++-include-file=sse4.h
objs/$(ISPC_SRC:.ispc=)_sse4.o: objs/$(ISPC_SRC:.ispc=)_sse4.cpp
$(CXX) -I../../intrinsics -msse4.2 $< $(CXXFLAGS) -c -o $@
$(EXAMPLE)-sse4: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_sse4.o
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
objs/$(ISPC_SRC:.ispc=)_generic16.cpp: $(ISPC_SRC)
$(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-16 --emit-c++ --c++-include-file=generic-16.h
objs/$(ISPC_SRC:.ispc=)_generic16.o: objs/$(ISPC_SRC:.ispc=)_generic16.cpp
$(CXX) -I../../intrinsics $< $(CXXFLAGS) -c -o $@
$(EXAMPLE)-generic16: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_generic16.o
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
objs/$(ISPC_SRC:.ispc=)_scalar.o: $(ISPC_SRC)
$(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-1
$(EXAMPLE)-scalar: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_scalar.o
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)

View File

@@ -960,17 +960,22 @@ InitTaskSystem() {
inline void inline void
TaskGroup::Launch(int baseIndex, int count) { TaskGroup::Launch(int baseIndex, int count) {
#pragma omp parallel for #pragma omp parallel
for(int i = 0; i < count; i++) { {
const int threadIndex = omp_get_thread_num();
const int threadCount = omp_get_num_threads();
#pragma omp for schedule(runtime)
for(int i = 0; i < count; i++)
{
TaskInfo *ti = GetTaskInfo(baseIndex + i); TaskInfo *ti = GetTaskInfo(baseIndex + i);
// Actually run the task. // Actually run the task.
int threadIndex = omp_get_thread_num();
int threadCount = omp_get_num_threads();
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
} }
}
} }
inline void inline void