This commit is contained in:
evghenii
2014-01-04 23:06:49 +01:00
parent be7f45b3bf
commit a784434e1f
3 changed files with 23 additions and 17 deletions

View File

@@ -2,15 +2,16 @@ PROG=mandelbrot_tasks
ISPC_SRC=mandelbrot_tasks.ispc ISPC_SRC=mandelbrot_tasks.ispc
CXX_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp CXX_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp
NVCC_SRC=../nvcc_helpers.cu NVCC_SRC=../nvcc_helpers.cu
NVCC_OBJS=objs/nvcc_helpers_nvcc.o
CXX=g++ CXX=g++
CXXFLAGS=-O3 -I$(CUDATK)/include CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs/ -D_CUDA_
NVCC=nvcc NVCC=nvcc
NVCC_FLAGS=-O3 -arch=sm_35 NVCC_FLAGS=-O3 -arch=sm_35 -D_CUDA_
LD=nvcc LD=nvcc
LDFLAGS=-lcudart -lcudadevrt LDFLAGS=-lcudart -lcudadevrt -arch=sm_35
PTXCC=ptxcc PTXCC=ptxcc
PTXCC_FLAGS = -maxrregcount=32 -Xptxas=-v PTXCC_FLAGS = -maxrregcount=32 -Xptxas=-v
@@ -22,18 +23,18 @@ ISPC_FLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
# PTXGEN += -opt=3 # PTXGEN += -opt=3
# PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1 # PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
###.SUFFIXES: .bc .o .cu .SUFFIXES: .bc .o .cu
ISPC_OBJS=objs/$(ISPC_SRC:.ispc=_ispc.o) ISPC_OBJS=$(ISPC_SRC:%.ispc=objs/%_ispc.o)
ISPC_BCS=objs/$(ISPC_SRC:.ispc=_ispc.bc) ISPC_BCS=$(ISPC_SRC:%.ispc=objs/%_ispc.bc)
ISPC_HEADERS=objs/$(ISPC_SRC:.ispc=_ispc.h) ISPC_HEADERS=$(ISPC_SRC:%.ispc=objs/%_ispc.h)
CXX_OBJS=objs/$(CXX_SRC:.cpp=_gcc.o) CXX_OBJS=$(CXX_SRC:%.cpp=objs/%_gcc.o)
NVCC_OBJS=objs/$(NVCC_SRC:.cu=_nvcc.o) #NVCC_OBJS=$(NVCC_SRC:%.cu=objs/%_nvcc.o)
OBJS=$(ISPC_OBJS) $(CXX_OBJS) $(NVCC_OBJS) OBJS=$(ISPC_OBJS) $(CXX_OBJS) $(NVCC_OBJS)
all: $(PROG) $(ISPC_BCS) all: dirs $(PROG) $(ISPC_BCS)
dirs: dirs:
/bin/mkdir -p objs/ /bin/mkdir -p objs/
@@ -41,22 +42,27 @@ dirs:
objs/%.cpp objs/%.o objs/%.h: dirs objs/%.cpp objs/%.o objs/%.h: dirs
clean: clean:
echo $(CXX_OBJS)
/bin/rm -rf $(PROG) objs /bin/rm -rf $(PROG) objs
$(PROG): $(OBJS) $(PROG): $(OBJS)
$(LD) -o $@ $^ $(LDFLAGS) $(LD) -o $@ $^ $(LDFLAGS)
objs/%_gcc.o: %.cpp dirs $(ISPC_HEADERS) objs/%_gcc.o: %.cpp $(ISPC_HEADERS)
$(CXX) $(CXXFLAGS) -o $@ -c $<
objs/%_gcc.o: ../%.cpp
$(CXX) $(CXXFLAGS) -o $@ -c $< $(CXX) $(CXXFLAGS) -o $@ -c $<
objs/%_cu.o: %.cu dirs objs/%_nvcc.o: ../%.cu
$(NVCC) $(NVCC_FLAGS) -o $@ -c $<
objs/%_nvcc.o: %.cu
$(NVCC) $(NVCC_FLAGS) -o $@ -c $< $(NVCC) $(NVCC_FLAGS) -o $@ -c $<
objs/%_ispc.h objs/%_ispc.bc: %.ispc dirs objs/%_ispc.h objs/%_ispc.bc: %.ispc
$(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs/$*_ispc.h -o objs/$*_ispc.bc $< $(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs/$*_ispc.h -o objs/$*_ispc.bc $<
objs/%_ispc.o: objs/%_ispc.bc objs/%_ispc.o: objs/%_ispc.bc
$(PTXCC) $(PTXCC_FLAGS) $< -o $@ $(PTXCC) $< $(PTXCC_FLAGS) -o $@

View File

@@ -104,7 +104,7 @@ int main(int argc, char *argv[]) {
int maxIterations = 512; int maxIterations = 512;
int *buf; int *buf;
ispc_malloc(&buf, n*widht*height); ispc_malloc((void**)&buf, sizeof(int)*width*height);
for (unsigned int i = 0; i < width * height; ++i) for (unsigned int i = 0; i < width * height; ++i)
buf[i] = 0; buf[i] = 0;
@@ -141,7 +141,7 @@ int main(int argc, char *argv[]) {
{ {
reset_and_start_timer(); reset_and_start_timer();
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
double dt = get_elapsed_mcycles(); double dt = get_elapsed_msec();
printf("@time of serial run:\t\t\t[%.3f] msec\n", dt); printf("@time of serial run:\t\t\t[%.3f] msec\n", dt);
minSerial = std::min(minSerial, dt); minSerial = std::min(minSerial, dt);
} }

View File

@@ -98,7 +98,7 @@ static inline void reset_and_start_timer()
/* Returns the number of millions of elapsed processor cycles since the /* Returns the number of millions of elapsed processor cycles since the
last reset_and_start_timer() call. */ last reset_and_start_timer() call. */
static inline double get_elapsed_mcycles() static inline double get_elapsed_msec()
{ {
end = rdtsc(); end = rdtsc();
tend = rtc(); tend = rtc();