added cpu compilation path
This commit is contained in:
@@ -5,4 +5,4 @@ ISPC_SRC=ao.ispc
|
||||
ISPC_IA_TARGETS=avx1-i32x8
|
||||
ISPC_ARM_TARGETS=neon
|
||||
|
||||
include ../common.mk
|
||||
include ../common_cpu.mk
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
PROG=ao
|
||||
ISPC_SRC=ao.ispc
|
||||
CU_SRC=ao.cu
|
||||
CXX_SRC=ao.cpp ao_serial.cpp
|
||||
CXX_SRC=ao.cpp
|
||||
PTXCC_REGMAX=64
|
||||
#ISPC_FLAGS= --opt=disable-uniform-control-flow
|
||||
|
||||
|
||||
@@ -58,8 +58,6 @@
|
||||
|
||||
#define NSUBSAMPLES 2
|
||||
|
||||
extern void ao_serial(int w, int h, int nsubsamples, float image[]);
|
||||
|
||||
static unsigned int test_iterations[] = {3, 7, 1};
|
||||
static unsigned int width, height;
|
||||
static unsigned char *img;
|
||||
@@ -123,13 +121,8 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
// Allocate space for output images
|
||||
#if 0
|
||||
img = new unsigned char[width * height * 3];
|
||||
fimg = new float[width * height * 3];
|
||||
#else
|
||||
ispc_malloc((void**) &img, sizeof(unsigned char)*width*height*3);
|
||||
ispc_malloc((void**)&fimg, sizeof( float)*width*height*3);
|
||||
#endif
|
||||
|
||||
//
|
||||
// Run the ispc + tasks path, test_iterations times, and report the
|
||||
@@ -137,7 +130,7 @@ int main(int argc, char **argv)
|
||||
//
|
||||
double minTimeISPCTasks = 1e30;
|
||||
for (unsigned int i = 0; i < test_iterations[1]; i++) {
|
||||
ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||
ispc_memset(fimg, 0, sizeof(float) * width * height * 3);
|
||||
assert(NSUBSAMPLES == 2);
|
||||
|
||||
reset_and_start_timer();
|
||||
@@ -152,29 +145,8 @@ int main(int argc, char **argv)
|
||||
minTimeISPCTasks, width, height);
|
||||
savePPM("ao-ispc-tasks.ppm", width, height);
|
||||
|
||||
//
|
||||
// Run the serial path, again test_iteration times, and report the
|
||||
// minimum time.
|
||||
//
|
||||
double minTimeSerial = 1e30;
|
||||
for (unsigned int i = 0; i < test_iterations[2]; i++) {
|
||||
ispc_memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||
reset_and_start_timer();
|
||||
ao_serial(width, height, NSUBSAMPLES, fimg);
|
||||
double t = get_elapsed_msec();
|
||||
printf("@time of serial run:\t\t\t\t[%.3f] msec\n", t);
|
||||
minTimeSerial = std::min(minTimeSerial, t);
|
||||
}
|
||||
|
||||
// Report more results, save another image...
|
||||
printf("[aobench serial]:\t\t[%.3f] msec (%d x %d image)\n", minTimeSerial,
|
||||
width, height);
|
||||
printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n",
|
||||
minTimeSerial / minTimeISPCTasks);
|
||||
savePPM("ao-serial.ppm", width, height);
|
||||
|
||||
ispc_free(img);
|
||||
ispc_free(fimg);
|
||||
delete img;
|
||||
delete fimg;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
120
examples/portable/common_cpu.mk
Normal file
120
examples/portable/common_cpu.mk
Normal file
@@ -0,0 +1,120 @@
|
||||
|
||||
TASK_CXX=../../tasksys.cpp ../../util/ispc_malloc.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=objs/tasksys.o objs/ispc_malloc.o
|
||||
|
||||
CXX=clang++
|
||||
CXX=icc -openmp
|
||||
CXXFLAGS+=-Iobjs/ -O2 -I../../ -I../../util
|
||||
CXXFLAGS+=-DISPC_USE_OMP
|
||||
CC=clang
|
||||
CC=icc -openmp
|
||||
CCFLAGS+=-Iobjs/ -O2 -I../../ -I../../util
|
||||
CXXFLAGS+=-DISPC_USE_OMP
|
||||
|
||||
LIBS=-lm $(TASK_LIB) -lstdc++
|
||||
ISPC=ispc
|
||||
ISPC_FLAGS+=-O2
|
||||
ISPC_HEADER=objs/$(ISPC_SRC:.ispc=_ispc.h)
|
||||
|
||||
ARCH:=$(shell uname -m | sed -e s/x86_64/x86/ -e s/i686/x86/ -e s/arm.*/arm/ -e s/sa110/arm/)
|
||||
|
||||
ifeq ($(ARCH),x86)
|
||||
ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc.o)
|
||||
COMMA=,
|
||||
ifneq (,$(findstring $(COMMA),$(ISPC_IA_TARGETS)))
|
||||
#$(info multi-target detected: $(ISPC_IA_TARGETS))
|
||||
ifneq (,$(findstring sse2,$(ISPC_IA_TARGETS)))
|
||||
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_sse2.o)
|
||||
endif
|
||||
ifneq (,$(findstring sse4,$(ISPC_IA_TARGETS)))
|
||||
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_sse4.o)
|
||||
endif
|
||||
ifneq (,$(findstring avx1-,$(ISPC_IA_TARGETS)))
|
||||
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx.o)
|
||||
endif
|
||||
ifneq (,$(findstring avx1.1,$(ISPC_IA_TARGETS)))
|
||||
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx11.o)
|
||||
endif
|
||||
ifneq (,$(findstring avx2,$(ISPC_IA_TARGETS)))
|
||||
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx2.o)
|
||||
endif
|
||||
endif
|
||||
ISPC_TARGETS=$(ISPC_IA_TARGETS)
|
||||
ARCH_BIT:=$(shell getconf LONG_BIT)
|
||||
ifeq ($(ARCH_BIT),32)
|
||||
ISPC_FLAGS += --arch=x86
|
||||
CXXFLAGS += -m32
|
||||
CCFLAGS += -m32
|
||||
else
|
||||
ISPC_FLAGS += --arch=x86-64
|
||||
CXXFLAGS += -m64
|
||||
CCFLAGS += -m64
|
||||
endif
|
||||
else ifeq ($(ARCH),arm)
|
||||
ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=_ispc.o))
|
||||
ISPC_TARGETS=$(ISPC_ARM_TARGETS)
|
||||
else
|
||||
$(error Unknown architecture $(ARCH) from uname -m)
|
||||
endif
|
||||
|
||||
CPP_OBJS=$(addprefix objs/, $(CPP_SRC:.cpp=.o))
|
||||
CC_OBJS=$(addprefix objs/, $(CC_SRC:.c=.o))
|
||||
OBJS=$(CPP_OBJS) $(CC_OBJS) $(TASK_OBJ) $(ISPC_OBJS)
|
||||
|
||||
default: $(EXAMPLE)
|
||||
|
||||
all: $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 $(EXAMPLE)-scalar
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
objs/%.cpp objs/%.o objs/%.h: dirs
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 ref test
|
||||
|
||||
$(EXAMPLE): $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
objs/%.o: %.cpp dirs $(ISPC_HEADER)
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: %.c dirs $(ISPC_HEADER)
|
||||
$(CC) $< $(CCFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../../%.cpp dirs
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
objs/%.o: ../../util/%.cpp dirs
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/$(EXAMPLE).o: objs/$(EXAMPLE)_ispc.h dirs
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o objs/%_ispc_avx11.o objs/%_ispc_avx2.o: %.ispc dirs
|
||||
$(ISPC) $(ISPC_FLAGS) --target=$(ISPC_TARGETS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_sse4.cpp: $(ISPC_SRC)
|
||||
$(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-4 --emit-c++ --c++-include-file=sse4.h
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_sse4.o: objs/$(ISPC_SRC:.ispc=)_sse4.cpp
|
||||
$(CXX) -I../../intrinsics -msse4.2 $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
$(EXAMPLE)-sse4: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_sse4.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_generic16.cpp: $(ISPC_SRC)
|
||||
$(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-16 --emit-c++ --c++-include-file=generic-16.h
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_generic16.o: objs/$(ISPC_SRC:.ispc=)_generic16.cpp
|
||||
$(CXX) -I../../intrinsics $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
$(EXAMPLE)-generic16: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_generic16.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_scalar.o: $(ISPC_SRC)
|
||||
$(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-1
|
||||
|
||||
$(EXAMPLE)-scalar: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_scalar.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
@@ -960,18 +960,23 @@ InitTaskSystem() {
|
||||
|
||||
inline void
|
||||
TaskGroup::Launch(int baseIndex, int count) {
|
||||
#pragma omp parallel for
|
||||
for(int i = 0; i < count; i++) {
|
||||
#pragma omp parallel
|
||||
{
|
||||
const int threadIndex = omp_get_thread_num();
|
||||
const int threadCount = omp_get_num_threads();
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
for(int i = 0; i < count; i++)
|
||||
{
|
||||
TaskInfo *ti = GetTaskInfo(baseIndex + i);
|
||||
|
||||
// Actually run the task.
|
||||
int threadIndex = omp_get_thread_num();
|
||||
int threadCount = omp_get_num_threads();
|
||||
ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(),
|
||||
ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(),
|
||||
ti->taskCount0(), ti->taskCount1(), ti->taskCount2());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void
|
||||
TaskGroup::Sync() {
|
||||
|
||||
Reference in New Issue
Block a user