diff --git a/examples_ptx/mandelbrot_tasks/Makefile b/examples_ptx/mandelbrot_tasks/Makefile index bbbe44ff..7cb1ea53 100644 --- a/examples_ptx/mandelbrot_tasks/Makefile +++ b/examples_ptx/mandelbrot_tasks/Makefile @@ -2,15 +2,16 @@ PROG=mandelbrot_tasks ISPC_SRC=mandelbrot_tasks.ispc CXX_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp NVCC_SRC=../nvcc_helpers.cu +NVCC_OBJS=objs/nvcc_helpers_nvcc.o CXX=g++ -CXXFLAGS=-O3 -I$(CUDATK)/include +CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs/ -D_CUDA_ NVCC=nvcc -NVCC_FLAGS=-O3 -arch=sm_35 +NVCC_FLAGS=-O3 -arch=sm_35 -D_CUDA_ LD=nvcc -LDFLAGS=-lcudart -lcudadevrt +LDFLAGS=-lcudart -lcudadevrt -arch=sm_35 PTXCC=ptxcc PTXCC_FLAGS = -maxrregcount=32 -Xptxas=-v @@ -22,18 +23,18 @@ ISPC_FLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math # PTXGEN += -opt=3 # PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1 -###.SUFFIXES: .bc .o .cu + .SUFFIXES: .bc .o .cu -ISPC_OBJS=objs/$(ISPC_SRC:.ispc=_ispc.o) -ISPC_BCS=objs/$(ISPC_SRC:.ispc=_ispc.bc) -ISPC_HEADERS=objs/$(ISPC_SRC:.ispc=_ispc.h) -CXX_OBJS=objs/$(CXX_SRC:.cpp=_gcc.o) -NVCC_OBJS=objs/$(NVCC_SRC:.cu=_nvcc.o) +ISPC_OBJS=$(ISPC_SRC:%.ispc=objs/%_ispc.o) +ISPC_BCS=$(ISPC_SRC:%.ispc=objs/%_ispc.bc) +ISPC_HEADERS=$(ISPC_SRC:%.ispc=objs/%_ispc.h) +CXX_OBJS=$(CXX_SRC:%.cpp=objs/%_gcc.o) +#NVCC_OBJS=$(NVCC_SRC:%.cu=objs/%_nvcc.o) OBJS=$(ISPC_OBJS) $(CXX_OBJS) $(NVCC_OBJS) -all: $(PROG) $(ISPC_BCS) +all: dirs $(PROG) $(ISPC_BCS) dirs: /bin/mkdir -p objs/ @@ -41,22 +42,27 @@ dirs: objs/%.cpp objs/%.o objs/%.h: dirs clean: + echo $(CXX_OBJS) /bin/rm -rf $(PROG) objs $(PROG): $(OBJS) $(LD) -o $@ $^ $(LDFLAGS) -objs/%_gcc.o: %.cpp dirs $(ISPC_HEADERS) +objs/%_gcc.o: %.cpp $(ISPC_HEADERS) + $(CXX) $(CXXFLAGS) -o $@ -c $< +objs/%_gcc.o: ../%.cpp $(CXX) $(CXXFLAGS) -o $@ -c $< -objs/%_cu.o: %.cu dirs +objs/%_nvcc.o: ../%.cu + $(NVCC) $(NVCC_FLAGS) -o $@ -c $< +objs/%_nvcc.o: %.cu $(NVCC) $(NVCC_FLAGS) -o $@ -c $< -objs/%_ispc.h objs/%_ispc.bc: %.ispc dirs +objs/%_ispc.h objs/%_ispc.bc: %.ispc $(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs/$*_ispc.h -o objs/$*_ispc.bc $< objs/%_ispc.o: objs/%_ispc.bc - $(PTXCC) $(PTXCC_FLAGS) $< -o $@ + $(PTXCC) $< $(PTXCC_FLAGS) -o $@ diff --git a/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.cpp b/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.cpp index 61678cdf..f40950c1 100644 --- a/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.cpp +++ b/examples_ptx/mandelbrot_tasks/mandelbrot_tasks.cpp @@ -104,7 +104,7 @@ int main(int argc, char *argv[]) { int maxIterations = 512; int *buf; - ispc_malloc(&buf, n*widht*height); + ispc_malloc((void**)&buf, sizeof(int)*width*height); for (unsigned int i = 0; i < width * height; ++i) buf[i] = 0; @@ -141,7 +141,7 @@ int main(int argc, char *argv[]) { { reset_and_start_timer(); mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); - double dt = get_elapsed_mcycles(); + double dt = get_elapsed_msec(); printf("@time of serial run:\t\t\t[%.3f] msec\n", dt); minSerial = std::min(minSerial, dt); } diff --git a/examples_ptx/timing.h b/examples_ptx/timing.h index 7a44c531..1e2238b1 100644 --- a/examples_ptx/timing.h +++ b/examples_ptx/timing.h @@ -98,7 +98,7 @@ static inline void reset_and_start_timer() /* Returns the number of millions of elapsed processor cycles since the last reset_and_start_timer() call. */ -static inline double get_elapsed_mcycles() +static inline double get_elapsed_msec() { end = rdtsc(); tend = rtc();