diff --git a/examples/portable/aobench/Makefile_knc b/examples/portable/aobench/Makefile_knc new file mode 100644 index 00000000..fe889a26 --- /dev/null +++ b/examples/portable/aobench/Makefile_knc @@ -0,0 +1,7 @@ +EXAMPLE=ao +CXX_SRC=ao.cpp +ISPC_SRC=ao.ispc +ISPC_INTRINSICS=../../intrinsics/knc-i1x16.h +ISPC_TARGET=generic-16 + +include ../common_knc.mk diff --git a/examples/portable/aobench/Makefile_gpu b/examples/portable/aobench/Makefile_ptx similarity index 85% rename from examples/portable/aobench/Makefile_gpu rename to examples/portable/aobench/Makefile_ptx index db3afc57..0b961fba 100644 --- a/examples/portable/aobench/Makefile_gpu +++ b/examples/portable/aobench/Makefile_ptx @@ -8,7 +8,7 @@ PTXCC_REGMAX=64 LLVM_GPU=1 NVVM_GPU=1 -include ../common_gpu.mk +include ../common_ptx.mk diff --git a/examples/portable/common_knc.mk b/examples/portable/common_knc.mk new file mode 100644 index 00000000..3f3a7930 --- /dev/null +++ b/examples/portable/common_knc.mk @@ -0,0 +1,51 @@ +TASK_CXX=../../tasksys.cpp ../../util/ispc_malloc.cpp +TASK_OBJ=objs_knc/tasksys.o objs_knc/ispc_malloc.o +TASK_LIB=-openmp + +CXX=icc -openmp -mmic +CXXFLAGS+=-Iobjs_knc/ -O2 -I../../ -I../../util -I./ +CXXFLAGS+= -DISPC_USE_OMP +CC=icc -openmp -mmic +CCFLAGS+= -Iobjs_knc/ -O2 -I../../ -I../../util -I./ +CXXFLAGS+=-DISPC_USE_OMP + +LD=icc -mmic -openmp + +LIBS=-lm $(TASK_LIB) -lstdc++ +ISPC=ispc +ISPC_FLAGS+=-O2 +ISPC_FLAGS+= --target=$(ISPC_TARGET) --c++-include-file=$(ISPC_INTRINSICS) + +ISPC_HEADERS=$(ISPC_SRC:%.ispc=objs_knc/%_ispc.h) +ISPC_OBJ=$(ISPC_SRC:%.ispc=objs_knc/%_ispc.o) +CXX_OBJ=$(CXX_SRC:%.cpp=objs_knc/%.o) +CXX_OBJ+=$(TASK_OBJ) + +PROG=$(EXAMPLE)_knc + +all: dirs $(PROG) + +dirs: + /bin/mkdir -p objs_knc/ + +objs_knc/%.cpp objs_knc/%.o objs_knc/%.h: dirs + +clean: + /bin/rm -rf $(PROG) objs_knc + +$(PROG): $(ISPC_OBJ) $(CXX_OBJ) + $(LD) -o $@ $^ $(LDFLAGS) + +objs_knc/%.o: %.cpp + $(CXX) $(CXXFLAGS) -o $@ -c $< + +objs_knc/%.o: ../../%.cpp + $(CXX) $(CXXFLAGS) -o $@ -c $< + +objs_knc/%.o: ../../util/%.cpp + $(CXX) $(CXXFLAGS) -o $@ -c $< + +objs_knc/%_ispc.o: %.ispc + $(ISPC) $(ISPC_FLAGS) --emit-c++ -o objs_knc/$*_ispc_zmm.cpp -h objs_knc/$*_ispc.h $< + $(CXX) $(CXXFLAGS) -o $@ objs_knc/$*_ispc_zmm.cpp -c + diff --git a/examples/portable/common_gpu.mk b/examples/portable/common_ptx.mk similarity index 50% rename from examples/portable/common_gpu.mk rename to examples/portable/common_ptx.mk index b04367e1..ed4b11cf 100644 --- a/examples/portable/common_gpu.mk +++ b/examples/portable/common_ptx.mk @@ -1,11 +1,11 @@ NVCC_SRC=../../util/nvcc_helpers.cu -NVCC_OBJS=objs_gpu/nvcc_helpers_nvcc.o +NVCC_OBJS=objs_ptx/nvcc_helpers_nvcc.o # CXX=g++ -ffast-math -CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_gpu/ -D_CUDA_ -I../../util -I../../ +CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_ptx/ -D_CUDA_ -I../../util -I../../ # NVCC=nvcc -NVCC_FLAGS+=-O3 -arch=sm_35 -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_gpu/ +NVCC_FLAGS+=-O3 -arch=sm_35 -D_CUDA_ -I../../util -Xptxas=-v -Iobjs_ptx/ ifdef PTXCC_REGMAX NVCC_FLAGS += --maxrregcount=$(PTXCC_REGMAX) endif @@ -26,18 +26,18 @@ ISPC_FLAGS+=-O3 --math-lib=fast --target=nvptx --opt=fast-math # # # -ISPC_LLVM_OBJS=$(ISPC_SRC:%.ispc=objs_gpu/%_llvm_ispc.o) -ISPC_NVVM_OBJS=$(ISPC_SRC:%.ispc=objs_gpu/%_nvvm_ispc.o) -ISPC_BCS=$(ISPC_SRC:%.ispc=objs_gpu/%_ispc.bc) -ISPC_LLVM_PTX=$(ISPC_SRC:%.ispc=objs_gpu/%_llvm_ispc.ptx) -ISPC_NVVM_PTX=$(ISPC_SRC:%.ispc=objs_gpu/%_nvvm_ispc.ptx) -ISPC_HEADERS=$(ISPC_SRC:%.ispc=objs_gpu/%_ispc.h) -CXX_OBJS=$(CXX_SRC:%.cpp=objs_gpu/%_gcc.o) -CU_OBJS=$(CU_SRC:%.cu=objs_gpu/%_cu.o) -#NVCC_OBJS=$(NVCC_SRC:%.cu=objs_gpu/%_nvcc.o) +ISPC_LLVM_OBJS=$(ISPC_SRC:%.ispc=objs_ptx/%_llvm_ispc.o) +ISPC_NVVM_OBJS=$(ISPC_SRC:%.ispc=objs_ptx/%_nvvm_ispc.o) +ISPC_BCS=$(ISPC_SRC:%.ispc=objs_ptx/%_ispc.bc) +ISPC_LLVM_PTX=$(ISPC_SRC:%.ispc=objs_ptx/%_llvm_ispc.ptx) +ISPC_NVVM_PTX=$(ISPC_SRC:%.ispc=objs_ptx/%_nvvm_ispc.ptx) +ISPC_HEADERS=$(ISPC_SRC:%.ispc=objs_ptx/%_ispc.h) +CXX_OBJS=$(CXX_SRC:%.cpp=objs_ptx/%_gcc.o) +CU_OBJS=$(CU_SRC:%.cu=objs_ptx/%_cu.o) +#NVCC_OBJS=$(NVCC_SRC:%.cu=objs_ptx/%_nvcc.o) CXX_SRC+=ispc_malloc.cpp -CXX_OBJS+=objs_gpu/ispc_malloc_gcc.o +CXX_OBJS+=objs_ptx/ispc_malloc_gcc.o PTXGEN = $(ISPC_HOME)/ptxtools/ptxgen PTXGEN += --use_fast_math @@ -51,16 +51,16 @@ LLC_FLAGS=-march=nvptx64 -mcpu=sm_35 # .SUFFIXES: .bc .o .cu ifdef LLVM_GPU - OBJSgpu_llvm=$(ISPC_LLVM_OBJS) $(CXX_OBJS) $(NVCC_OBJS) - PROGgpu_llvm=$(PROG)_llvm_gpu + OBJSptx_llvm=$(ISPC_LLVM_OBJS) $(CXX_OBJS) $(NVCC_OBJS) + PROGptx_llvm=$(PROG)_llvm_ptx else ISPC_LLVM_PTX= endif ifdef NVVM_GPU - OBJSgpu_nvvm=$(ISPC_NVVM_OBJS) $(CXX_OBJS) $(NVCC_OBJS) $(ISPC_LVVM_PTX) - PROGgpu_nvvm=$(PROG)_nvvm_gpu + OBJSptx_nvvm=$(ISPC_NVVM_OBJS) $(CXX_OBJS) $(NVCC_OBJS) $(ISPC_LVVM_PTX) + PROGptx_nvvm=$(PROG)_nvvm_ptx else ISPC_NVVM_PTX= endif @@ -72,55 +72,55 @@ endif all: dirs \ - $(PROGgpu_nvvm) \ - $(PROGgpu_llvm) \ + $(PROGptx_nvvm) \ + $(PROGptx_llvm) \ $(PROGcu) $(ISPC_BC) $(ISPC_HEADERS) $(ISPC_NVVM_PTX) $(ISPC_LLVM_PTX) dirs: - /bin/mkdir -p objs_gpu/ + /bin/mkdir -p objs_ptx/ -objs_gpu/%.cpp objs_gpu/%.o objs_gpu/%.h: dirs +objs_ptx/%.cpp objs_ptx/%.o objs_ptx/%.h: dirs clean: - /bin/rm -rf $(PROGgpu_nvvm) $(PROGgpu_llvm) $(PROGcu) objs_gpu + /bin/rm -rf $(PROGptx_nvvm) $(PROGptx_llvm) $(PROGcu) objs_ptx # generate binaries -$(PROGgpu_llvm): $(OBJSgpu_llvm) +$(PROGptx_llvm): $(OBJSptx_llvm) $(LD) -o $@ $^ $(LDFLAGS) -$(PROGgpu_nvvm): $(OBJSgpu_nvvm) +$(PROGptx_nvvm): $(OBJSptx_nvvm) $(LD) -o $@ $^ $(LDFLAGS) $(PROGcu): $(OBJScu) $(LD) -o $@ $^ $(LDFLAGS) # compile C++ code -objs_gpu/%_gcc.o: %.cpp $(ISPC_HEADERS) +objs_ptx/%_gcc.o: %.cpp $(ISPC_HEADERS) $(CXX) $(CXXFLAGS) -o $@ -c $< -objs_gpu/%_gcc.o: ../../util/%.cpp +objs_ptx/%_gcc.o: ../../util/%.cpp $(CXX) $(CXXFLAGS) -o $@ -c $< # CUDA helpers -objs_gpu/%_cu.o: %.cu $(ISPC_HEADERS) +objs_ptx/%_cu.o: %.cu $(ISPC_HEADERS) $(NVCC) $(NVCC_FLAGS) -o $@ -dc $< # compile CUDA code -objs_gpu/%_nvcc.o: ../../util/%.cu +objs_ptx/%_nvcc.o: ../../util/%.cu $(NVCC) $(NVCC_FLAGS) -o $@ -c $< -objs_gpu/%_nvcc.o: %.cu +objs_ptx/%_nvcc.o: %.cu $(NVCC) $(NVCC_FLAGS) -o $@ -c $< # compile ISPC to LLVM BC -objs_gpu/%_ispc.h objs_gpu/%_ispc.bc: %.ispc - $(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs_gpu/$*_ispc.h -o objs_gpu/$*_ispc.bc $< +objs_ptx/%_ispc.h objs_ptx/%_ispc.bc: %.ispc + $(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs_ptx/$*_ispc.h -o objs_ptx/$*_ispc.bc $< # generate PTX from LLVM BC -objs_gpu/%_llvm_ispc.ptx: objs_gpu/%_ispc.bc +objs_ptx/%_llvm_ispc.ptx: objs_ptx/%_ispc.bc $(LLC) $(LLC_FLAGS) -o $@ $< -objs_gpu/%_nvvm_ispc.ptx: objs_gpu/%_ispc.bc - $(LLVM32DIS) $< -o objs_gpu/$*_ispc-ll32.ll - $(PTXGEN) objs_gpu/$*_ispc-ll32.ll -o $@ +objs_ptx/%_nvvm_ispc.ptx: objs_ptx/%_ispc.bc + $(LLVM32DIS) $< -o objs_ptx/$*_ispc-ll32.ll + $(PTXGEN) objs_ptx/$*_ispc-ll32.ll -o $@ # generate an object file from PTX -objs_gpu/%_ispc.o: objs_gpu/%_ispc.ptx +objs_ptx/%_ispc.o: objs_ptx/%_ispc.ptx $(PTXCC) $< -Xnvcc="$(PTXCC_FLAGS)" -o $@