Files
ispc/examples/common.mk
Matt Pharr d7b0c5794e Add support for ARM NEON targets.
Initial support for ARM NEON on Cortex-A9 and A15 CPUs.  All but ~10 tests
pass, and all examples compile and run correctly.  Most of the examples
show a ~2x speedup on a single A15 core versus scalar code.

Current open issues/TODOs
- Code quality looks decent, but hasn't been carefully examined.  Known
  issues/opportunities for improvement include:
  - fp32 vector divide is done as a series of scalar divides rather than
    a vector divide (which I believe exists, but I may be mistaken.)
    This is particularly harmful to examples/rt, which only runs ~1.5x
    faster with ispc, likely due to long chains of scalar divides.
  - The compiler isn't generating a vmin.f32 for e.g. the final scalar
    min in reduce_min(); instead it's generating a compare and then a
    select instruction (and similarly elsewhere).
  - There are some additional FIXMEs in builtins/target-neon.ll that
    include both a few pieces of missing functionality (e.g. rounding
    doubles) as well as places that deserve attention for possible
    code quality improvements.

- Currently only the "cortex-a9" and "cortex-15" CPU targets are
  supported; LLVM supports many other ARM CPUs and ispc should provide
  access to all of the ones that have NEON support (and aren't too
  obscure.)

- ~5 of the reduce-* tests hit an assertion inside LLVM (unfortunately
   only when the compiler runs on an ARM host, though).

- The Windows build hasn't been tested (though I've tried to update
  ispc.vcxproj appropriately).  It may just work, but will more likely
  have various small issues.)

- Anything related to 64-bit ARM has seen no attention.
2013-07-19 23:07:24 -07:00

89 lines
2.5 KiB
Makefile

TASK_CXX=../tasksys.cpp
TASK_LIB=-lpthread
TASK_OBJ=objs/tasksys.o
CXX=g++
CXXFLAGS=-Iobjs/ -O2
CC=gcc
CCFLAGS=-Iobjs/ -O2
LIBS=-lm $(TASK_LIB) -lstdc++
ISPC=ispc -O2 $(ISPC_FLAGS)
ISPC_HEADER=objs/$(ISPC_SRC:.ispc=_ispc.h)
ARCH:=$(shell uname -m | sed -e s/x86_64/x86/ -e s/arm.*/arm/ -e s/sa110/arm/)
ifeq ($(ARCH),x86)
ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc.o $(ISPC_SRC:.ispc=)_ispc_sse2.o \
$(ISPC_SRC:.ispc=)_ispc_sse4.o $(ISPC_SRC:.ispc=)_ispc_avx.o)
ISPC_TARGETS=$(ISPC_IA_TARGETS)
ISPC_FLAGS += --arch=x86-64
CXXFLAGS += -m64
CCFLAGS += -m64
else ifeq ($(ARCH),arm)
ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=_ispc.o))
ISPC_TARGETS=$(ISPC_ARM_TARGETS)
else
$(error Unknown architecture $(ARCH) from uname -m)
endif
CPP_OBJS=$(addprefix objs/, $(CPP_SRC:.cpp=.o))
CC_OBJS=$(addprefix objs/, $(CC_SRC:.c=.o))
OBJS=$(CPP_OBJS) $(CC_OBJS) $(TASK_OBJ) $(ISPC_OBJS)
default: $(EXAMPLE)
all: $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 $(EXAMPLE)-scalar
.PHONY: dirs clean
dirs:
/bin/mkdir -p objs/
objs/%.cpp objs/%.o objs/%.h: dirs
clean:
/bin/rm -rf objs *~ $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16
$(EXAMPLE): $(OBJS)
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
objs/%.o: %.cpp dirs $(ISPC_HEADER)
$(CXX) $< $(CXXFLAGS) -c -o $@
objs/%.o: %.c dirs $(ISPC_HEADER)
$(CC) $< $(CCFLAGS) -c -o $@
objs/%.o: ../%.cpp dirs
$(CXX) $< $(CXXFLAGS) -c -o $@
objs/$(EXAMPLE).o: objs/$(EXAMPLE)_ispc.h
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
$(ISPC) --target=$(ISPC_TARGETS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
objs/$(ISPC_SRC:.ispc=)_sse4.cpp: $(ISPC_SRC)
$(ISPC) $< -o $@ --target=generic-4 --emit-c++ --c++-include-file=sse4.h
objs/$(ISPC_SRC:.ispc=)_sse4.o: objs/$(ISPC_SRC:.ispc=)_sse4.cpp
$(CXX) -I../intrinsics -msse4.2 $< $(CXXFLAGS) -c -o $@
$(EXAMPLE)-sse4: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_sse4.o
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
objs/$(ISPC_SRC:.ispc=)_generic16.cpp: $(ISPC_SRC)
$(ISPC) $< -o $@ --target=generic-16 --emit-c++ --c++-include-file=generic-16.h
objs/$(ISPC_SRC:.ispc=)_generic16.o: objs/$(ISPC_SRC:.ispc=)_generic16.cpp
$(CXX) -I../intrinsics $< $(CXXFLAGS) -c -o $@
$(EXAMPLE)-generic16: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_generic16.o
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
objs/$(ISPC_SRC:.ispc=)_scalar.o: $(ISPC_SRC)
$(ISPC) $< -o $@ --target=generic-1
$(EXAMPLE)-scalar: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_scalar.o
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)