This commit is contained in:
evghenii
2014-01-04 22:39:40 +01:00
parent c7bf732fdf
commit 916ccca3a1
2 changed files with 73 additions and 18 deletions

View File

@@ -1,8 +1,63 @@
EXAMPLE=mandelbrot_tasks
CPP_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp
PROG=mandelbrot_tasks
ISPC_SRC=mandelbrot_tasks.ispc
ISPC_IA_TARGETS=sse2-i32x4,sse4-i32x8,avx1-i32x16,avx2-i32x16
ISPC_ARM_TARGETS=neon
CXX_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp
NVCC_SRC=../nvcc_helpers.cu
CXX=g++
CXXFLAGS=-O3 -I$(CUDATK)/include
NVCC=nvcc
NVCC_FLAGS=-O3 -arch=sm_35
LD=nvcc
LDFLAGS=-lcudart -lcudadevrt
PTXCC=ptxcc
PTXCC_FLAGS = -maxrregcount=32 -Xptxas=-v
ISPC=ispc
ISPC_FLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
# PTXGEN = $(HOME)/ptxgen
# PTXGEN += -opt=3
# PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
###.SUFFIXES: .bc .o .cu
ISPC_OBJS=objs/$(ISPC_SRC:.ispc=_ispc.o)
ISPC_BCS=objs/$(ISPC_SRC:.ispc=_ispc.bc)
ISPC_HEADERS=objs/$(ISPC_SRC:.ispc=_ispc.h)
CXX_OBJS=objs/$(CXX_SRC:.cpp=_gcc.o)
NVCC_OBJS=objs/$(NVCC_SRC:.cu=_nvcc.o)
OBJS=$(ISPC_OBJS) $(CXX_OBJS) $(NVCC_OBJS)
all: $(PROG)
dirs:
/bin/mkdir -p objs/
objs/%.cpp objs/%.o objs/%.h: dirs
clean:
/bin/rm -rf $(PROG) objs
$(PROG): $(OBJS)
$(LD) -o $@ $^ $(LDFLAGS)
objs/%_gcc.o: %.cpp dirs $(ISPC_HEADERS)
$(CXX) $(CXXFLAGS) -o $@ -c $<
objs/%_cu.o: %.cu dirs
$(NVCC) $(NVCC_FLAGS) -o $@ -c $<
objs/%_ispc.h objs/%_ispc.bc: %.ispc dirs
$(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs/$*_ispc.h -o objs/$*_ispc.bc $<
objs/%_ispc.o: objs/%_ispc.bc
$(PTXCC) $(PTXCC_FLAGS) $< -o $@
include ../common.mk

View File

@@ -33,21 +33,21 @@
static inline int
mandel(float c_re, float c_im, int count) {
float z_re = c_re, z_im = c_im;
int i;
for (i = 0; i < count; ++i) {
if (z_re * z_re + z_im * z_im > 4.)
break;
float z_re = c_re, z_im = c_im;
int i;
for (i = 0; i < count; ++i) {
if (z_re * z_re + z_im * z_im > 4.0f)
break;
float new_re = z_re*z_re - z_im*z_im;
float new_im = 2.f * z_re * z_im;
unmasked {
z_re = c_re + new_re;
z_im = c_im + new_im;
}
float new_re = z_re*z_re - z_im*z_im;
float new_im = 2.0f * z_re * z_im;
unmasked {
z_re = c_re + new_re;
z_im = c_im + new_im;
}
}
return i;
return i;
}