added Makefile for gpu and cpu
This commit is contained in:
114
examples_ptx/common.mk
Normal file
114
examples_ptx/common.mk
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
|
||||||
|
TASK_CXX=../tasksys.cpp
|
||||||
|
TASK_LIB=-lpthread
|
||||||
|
TASK_OBJ=objs/tasksys.o
|
||||||
|
|
||||||
|
CXX=clang++
|
||||||
|
CXXFLAGS+=-Iobjs/ -O2
|
||||||
|
CC=clang
|
||||||
|
CCFLAGS+=-Iobjs/ -O2
|
||||||
|
|
||||||
|
LIBS=-lm $(TASK_LIB) -lstdc++
|
||||||
|
ISPC=ispc
|
||||||
|
ISPC_FLAGS+=-O2
|
||||||
|
ISPC_HEADER=objs/$(ISPC_SRC:.ispc=_ispc.h)
|
||||||
|
|
||||||
|
ARCH:=$(shell uname -m | sed -e s/x86_64/x86/ -e s/i686/x86/ -e s/arm.*/arm/ -e s/sa110/arm/)
|
||||||
|
|
||||||
|
ifeq ($(ARCH),x86)
|
||||||
|
ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc.o)
|
||||||
|
COMMA=,
|
||||||
|
ifneq (,$(findstring $(COMMA),$(ISPC_IA_TARGETS)))
|
||||||
|
#$(info multi-target detected: $(ISPC_IA_TARGETS))
|
||||||
|
ifneq (,$(findstring sse2,$(ISPC_IA_TARGETS)))
|
||||||
|
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_sse2.o)
|
||||||
|
endif
|
||||||
|
ifneq (,$(findstring sse4,$(ISPC_IA_TARGETS)))
|
||||||
|
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_sse4.o)
|
||||||
|
endif
|
||||||
|
ifneq (,$(findstring avx1-,$(ISPC_IA_TARGETS)))
|
||||||
|
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx.o)
|
||||||
|
endif
|
||||||
|
ifneq (,$(findstring avx1.1,$(ISPC_IA_TARGETS)))
|
||||||
|
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx11.o)
|
||||||
|
endif
|
||||||
|
ifneq (,$(findstring avx2,$(ISPC_IA_TARGETS)))
|
||||||
|
ISPC_OBJS+=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc_avx2.o)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
ISPC_TARGETS=$(ISPC_IA_TARGETS)
|
||||||
|
ARCH_BIT:=$(shell getconf LONG_BIT)
|
||||||
|
ifeq ($(ARCH_BIT),32)
|
||||||
|
ISPC_FLAGS += --arch=x86
|
||||||
|
CXXFLAGS += -m32
|
||||||
|
CCFLAGS += -m32
|
||||||
|
else
|
||||||
|
ISPC_FLAGS += --arch=x86-64
|
||||||
|
CXXFLAGS += -m64
|
||||||
|
CCFLAGS += -m64
|
||||||
|
endif
|
||||||
|
else ifeq ($(ARCH),arm)
|
||||||
|
ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=_ispc.o))
|
||||||
|
ISPC_TARGETS=$(ISPC_ARM_TARGETS)
|
||||||
|
else
|
||||||
|
$(error Unknown architecture $(ARCH) from uname -m)
|
||||||
|
endif
|
||||||
|
|
||||||
|
CPP_OBJS=$(addprefix objs/, $(CPP_SRC:.cpp=.o))
|
||||||
|
CC_OBJS=$(addprefix objs/, $(CC_SRC:.c=.o))
|
||||||
|
OBJS=$(CPP_OBJS) $(CC_OBJS) $(TASK_OBJ) $(ISPC_OBJS)
|
||||||
|
|
||||||
|
default: $(EXAMPLE)
|
||||||
|
|
||||||
|
all: $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 $(EXAMPLE)-scalar
|
||||||
|
|
||||||
|
.PHONY: dirs clean
|
||||||
|
|
||||||
|
dirs:
|
||||||
|
/bin/mkdir -p objs/
|
||||||
|
|
||||||
|
objs/%.cpp objs/%.o objs/%.h: dirs
|
||||||
|
|
||||||
|
clean:
|
||||||
|
/bin/rm -rf objs *~ $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 ref test
|
||||||
|
|
||||||
|
$(EXAMPLE): $(OBJS)
|
||||||
|
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||||
|
|
||||||
|
objs/%.o: %.cpp dirs $(ISPC_HEADER)
|
||||||
|
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||||
|
|
||||||
|
objs/%.o: %.c dirs $(ISPC_HEADER)
|
||||||
|
$(CC) $< $(CCFLAGS) -c -o $@
|
||||||
|
|
||||||
|
objs/%.o: ../%.cpp dirs
|
||||||
|
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||||
|
|
||||||
|
objs/$(EXAMPLE).o: objs/$(EXAMPLE)_ispc.h dirs
|
||||||
|
|
||||||
|
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o objs/%_ispc_avx11.o objs/%_ispc_avx2.o: %.ispc dirs
|
||||||
|
$(ISPC) $(ISPC_FLAGS) --target=$(ISPC_TARGETS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||||
|
|
||||||
|
objs/$(ISPC_SRC:.ispc=)_sse4.cpp: $(ISPC_SRC)
|
||||||
|
$(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-4 --emit-c++ --c++-include-file=sse4.h
|
||||||
|
|
||||||
|
objs/$(ISPC_SRC:.ispc=)_sse4.o: objs/$(ISPC_SRC:.ispc=)_sse4.cpp
|
||||||
|
$(CXX) -I../intrinsics -msse4.2 $< $(CXXFLAGS) -c -o $@
|
||||||
|
|
||||||
|
$(EXAMPLE)-sse4: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_sse4.o
|
||||||
|
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||||
|
|
||||||
|
objs/$(ISPC_SRC:.ispc=)_generic16.cpp: $(ISPC_SRC)
|
||||||
|
$(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-16 --emit-c++ --c++-include-file=generic-16.h
|
||||||
|
|
||||||
|
objs/$(ISPC_SRC:.ispc=)_generic16.o: objs/$(ISPC_SRC:.ispc=)_generic16.cpp
|
||||||
|
$(CXX) -I../intrinsics $< $(CXXFLAGS) -c -o $@
|
||||||
|
|
||||||
|
$(EXAMPLE)-generic16: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_generic16.o
|
||||||
|
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||||
|
|
||||||
|
objs/$(ISPC_SRC:.ispc=)_scalar.o: $(ISPC_SRC)
|
||||||
|
$(ISPC) $(ISPC_FLAGS) $< -o $@ --target=generic-1
|
||||||
|
|
||||||
|
$(EXAMPLE)-scalar: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_scalar.o
|
||||||
|
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||||
@@ -1,69 +1,8 @@
|
|||||||
PROG=mandelbrot_tasks
|
|
||||||
|
EXAMPLE=mandelbrot_tasks
|
||||||
|
CPP_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp
|
||||||
ISPC_SRC=mandelbrot_tasks.ispc
|
ISPC_SRC=mandelbrot_tasks.ispc
|
||||||
CXX_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp
|
ISPC_IA_TARGETS=avx1-i32x16
|
||||||
NVCC_SRC=../nvcc_helpers.cu
|
ISPC_ARM_TARGETS=neon
|
||||||
NVCC_OBJS=objs/nvcc_helpers_nvcc.o
|
|
||||||
|
|
||||||
CXX=g++
|
|
||||||
CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs/ -D_CUDA_
|
|
||||||
|
|
||||||
NVCC=nvcc
|
|
||||||
NVCC_FLAGS=-O3 -arch=sm_35 -D_CUDA_
|
|
||||||
|
|
||||||
LD=nvcc
|
|
||||||
LDFLAGS=-lcudart -lcudadevrt -arch=sm_35
|
|
||||||
|
|
||||||
PTXCC=ptxcc
|
|
||||||
PTXCC_FLAGS = -maxrregcount=32 -Xptxas=-v
|
|
||||||
|
|
||||||
ISPC=ispc
|
|
||||||
ISPC_FLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
|
||||||
|
|
||||||
# PTXGEN = $(HOME)/ptxgen
|
|
||||||
# PTXGEN += -opt=3
|
|
||||||
# PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
|
||||||
|
|
||||||
.SUFFIXES: .bc .o .cu
|
|
||||||
|
|
||||||
|
|
||||||
ISPC_OBJS=$(ISPC_SRC:%.ispc=objs/%_ispc.o)
|
|
||||||
ISPC_BCS=$(ISPC_SRC:%.ispc=objs/%_ispc.bc)
|
|
||||||
ISPC_HEADERS=$(ISPC_SRC:%.ispc=objs/%_ispc.h)
|
|
||||||
CXX_OBJS=$(CXX_SRC:%.cpp=objs/%_gcc.o)
|
|
||||||
#NVCC_OBJS=$(NVCC_SRC:%.cu=objs/%_nvcc.o)
|
|
||||||
|
|
||||||
OBJS=$(ISPC_OBJS) $(CXX_OBJS) $(NVCC_OBJS)
|
|
||||||
|
|
||||||
all: dirs $(PROG) $(ISPC_BCS)
|
|
||||||
|
|
||||||
dirs:
|
|
||||||
/bin/mkdir -p objs/
|
|
||||||
|
|
||||||
objs/%.cpp objs/%.o objs/%.h: dirs
|
|
||||||
|
|
||||||
clean:
|
|
||||||
echo $(CXX_OBJS)
|
|
||||||
/bin/rm -rf $(PROG) objs
|
|
||||||
|
|
||||||
$(PROG): $(OBJS)
|
|
||||||
$(LD) -o $@ $^ $(LDFLAGS)
|
|
||||||
|
|
||||||
objs/%_gcc.o: %.cpp $(ISPC_HEADERS)
|
|
||||||
$(CXX) $(CXXFLAGS) -o $@ -c $<
|
|
||||||
objs/%_gcc.o: ../%.cpp
|
|
||||||
$(CXX) $(CXXFLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
objs/%_nvcc.o: ../%.cu
|
|
||||||
$(NVCC) $(NVCC_FLAGS) -o $@ -c $<
|
|
||||||
objs/%_nvcc.o: %.cu
|
|
||||||
$(NVCC) $(NVCC_FLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
objs/%_ispc.h objs/%_ispc.bc: %.ispc
|
|
||||||
$(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs/$*_ispc.h -o objs/$*_ispc.bc $<
|
|
||||||
|
|
||||||
objs/%_ispc.o: objs/%_ispc.bc
|
|
||||||
$(PTXCC) $< $(PTXCC_FLAGS) -o $@
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
include ../common.mk
|
||||||
|
|||||||
69
examples_ptx/mandelbrot_tasks/Makefile_gpu
Normal file
69
examples_ptx/mandelbrot_tasks/Makefile_gpu
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
PROG=mandelbrot_tasks_gpu
|
||||||
|
ISPC_SRC=mandelbrot_tasks.ispc
|
||||||
|
CXX_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp
|
||||||
|
NVCC_SRC=../nvcc_helpers.cu
|
||||||
|
NVCC_OBJS=objs_gpu/nvcc_helpers_nvcc.o
|
||||||
|
|
||||||
|
CXX=g++
|
||||||
|
CXXFLAGS=-O3 -I$(CUDATK)/include -Iobjs_gpu/ -D_CUDA_
|
||||||
|
|
||||||
|
NVCC=nvcc
|
||||||
|
NVCC_FLAGS=-O3 -arch=sm_35 -D_CUDA_
|
||||||
|
|
||||||
|
LD=nvcc
|
||||||
|
LDFLAGS=-lcudart -lcudadevrt -arch=sm_35
|
||||||
|
|
||||||
|
PTXCC=ptxcc
|
||||||
|
PTXCC_FLAGS = -maxrregcount=32 -Xptxas=-v
|
||||||
|
|
||||||
|
ISPC=ispc
|
||||||
|
ISPC_FLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||||
|
|
||||||
|
# PTXGEN = $(HOME)/ptxgen
|
||||||
|
# PTXGEN += -opt=3
|
||||||
|
# PTXGEN += -ftz=1 -prec-div=0 -prec-sqrt=0 -fma=1
|
||||||
|
|
||||||
|
.SUFFIXES: .bc .o .cu
|
||||||
|
|
||||||
|
|
||||||
|
ISPC_OBJS=$(ISPC_SRC:%.ispc=objs_gpu/%_ispc.o)
|
||||||
|
ISPC_BCS=$(ISPC_SRC:%.ispc=objs_gpu/%_ispc.bc)
|
||||||
|
ISPC_HEADERS=$(ISPC_SRC:%.ispc=objs_gpu/%_ispc.h)
|
||||||
|
CXX_OBJS=$(CXX_SRC:%.cpp=objs_gpu/%_gcc.o)
|
||||||
|
#NVCC_OBJS=$(NVCC_SRC:%.cu=objs_gpu/%_nvcc.o)
|
||||||
|
|
||||||
|
OBJS=$(ISPC_OBJS) $(CXX_OBJS) $(NVCC_OBJS)
|
||||||
|
|
||||||
|
all: dirs $(PROG) $(ISPC_BCS)
|
||||||
|
|
||||||
|
dirs:
|
||||||
|
/bin/mkdir -p objs_gpu/
|
||||||
|
|
||||||
|
objs_gpu/%.cpp objs_gpu/%.o objs_gpu/%.h: dirs
|
||||||
|
|
||||||
|
clean:
|
||||||
|
echo $(CXX_OBJS)
|
||||||
|
/bin/rm -rf $(PROG) objs_gpu
|
||||||
|
|
||||||
|
$(PROG): $(OBJS)
|
||||||
|
$(LD) -o $@ $^ $(LDFLAGS)
|
||||||
|
|
||||||
|
objs_gpu/%_gcc.o: %.cpp $(ISPC_HEADERS)
|
||||||
|
$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||||
|
objs_gpu/%_gcc.o: ../%.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||||
|
|
||||||
|
objs_gpu/%_nvcc.o: ../%.cu
|
||||||
|
$(NVCC) $(NVCC_FLAGS) -o $@ -c $<
|
||||||
|
objs_gpu/%_nvcc.o: %.cu
|
||||||
|
$(NVCC) $(NVCC_FLAGS) -o $@ -c $<
|
||||||
|
|
||||||
|
objs_gpu/%_ispc.h objs_gpu/%_ispc.bc: %.ispc
|
||||||
|
$(ISPC) $(ISPC_FLAGS) --emit-llvm -h objs_gpu/$*_ispc.h -o objs_gpu/$*_ispc.bc $<
|
||||||
|
|
||||||
|
objs_gpu/%_ispc.o: objs_gpu/%_ispc.bc
|
||||||
|
$(PTXCC) $< $(PTXCC_FLAGS) -o $@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
1381
examples_ptx/tasksys.cpp
Normal file
1381
examples_ptx/tasksys.cpp
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user