Compare commits
397 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
016b502d46 | ||
|
|
c5f6653564 | ||
|
|
cf9a4e209e | ||
|
|
4dfc596d38 | ||
|
|
fe83ef7635 | ||
|
|
db8b08131f | ||
|
|
32815e628d | ||
|
|
71bdc67a45 | ||
|
|
cb9f50ef63 | ||
|
|
12c754c92b | ||
|
|
e4b3d03da5 | ||
|
|
cc26b66e99 | ||
|
|
34d81fa522 | ||
|
|
49f1a5c2b3 | ||
|
|
326c45fa17 | ||
|
|
a2bb899a6b | ||
|
|
9fedb1674e | ||
|
|
7c91b01125 | ||
|
|
c202e9e106 | ||
|
|
645a8c9349 | ||
|
|
abf7c423bb | ||
|
|
55d5c07d00 | ||
|
|
b9d6ba2aa0 | ||
|
|
a0c9f7823b | ||
|
|
99a27fe241 | ||
|
|
fefa86e0cf | ||
|
|
098c4910de | ||
|
|
17b7148300 | ||
|
|
f4a2ef28e3 | ||
|
|
f0d013ee76 | ||
|
|
5ece6fec04 | ||
|
|
d88dbf3612 | ||
|
|
2a18efef82 | ||
|
|
fd846fbe77 | ||
|
|
ca7cc4744e | ||
|
|
491fa239bd | ||
|
|
66765dc123 | ||
|
|
70a5348f43 | ||
|
|
2aa61007c6 | ||
|
|
acfbe77ffc | ||
|
|
08696653ca | ||
|
|
8a1a214ca9 | ||
|
|
7aaeb27e0f | ||
|
|
972043c146 | ||
|
|
8475dc082a | ||
|
|
d0e583b29c | ||
|
|
c8feee238b | ||
|
|
6712ecd928 | ||
|
|
d0c7b5d35c | ||
|
|
802add1f97 | ||
|
|
95556811fa | ||
|
|
581472564d | ||
|
|
c7dc8862a5 | ||
|
|
4f8cf019ca | ||
|
|
4c9ac7fcf1 | ||
|
|
1dac05960a | ||
|
|
c27418da77 | ||
|
|
637d076e99 | ||
|
|
391678a5b3 | ||
|
|
4cd0cf1650 | ||
|
|
b813452d33 | ||
|
|
eb85da81e1 | ||
|
|
920cf63201 | ||
|
|
dc09d46bf4 | ||
|
|
05d1b06eeb | ||
|
|
c1661eb06b | ||
|
|
e9626a1d10 | ||
|
|
560bf5ca09 | ||
|
|
87c8a89349 | ||
|
|
255791f18e | ||
|
|
d5e3416e8e | ||
|
|
5b2d43f665 | ||
|
|
540fc6c2f3 | ||
|
|
b3c5043dcc | ||
|
|
d0d9aae968 | ||
|
|
3270e2bf5a | ||
|
|
013a3e7567 | ||
|
|
8368ba8539 | ||
|
|
ca0310e335 | ||
|
|
4690a678c1 | ||
|
|
f8a39402a2 | ||
|
|
247775d1ec | ||
|
|
6e9fea377d | ||
|
|
ca5c65d032 | ||
|
|
f9dc621ebe | ||
|
|
ffe484c31e | ||
|
|
62cd3418ca | ||
|
|
d8a8f3a996 | ||
|
|
0ad8dbbfc9 | ||
|
|
e15a1946c6 | ||
|
|
8878826661 | ||
|
|
95a8b6e5e8 | ||
|
|
388d0d2cfd | ||
|
|
d3a374e71c | ||
|
|
1da2834b1e | ||
|
|
ca3100874f | ||
|
|
117f48a331 | ||
|
|
89bbceefee | ||
|
|
7e18f0e247 | ||
|
|
3bb2dee275 | ||
|
|
88cd5584e8 | ||
|
|
20044f5749 | ||
|
|
10c5ba140c | ||
|
|
316de0b880 | ||
|
|
989966f81b | ||
|
|
ccd550dc52 | ||
|
|
ddf350839a | ||
|
|
6a7dd2787a | ||
|
|
349ab0b9c5 | ||
|
|
b5e6c6a2f3 | ||
|
|
2832ea641f | ||
|
|
cb7edf2725 | ||
|
|
f1f1be2822 | ||
|
|
7dffd65609 | ||
|
|
2c8a44e28b | ||
|
|
39bb95a6ee | ||
|
|
da9dba80a0 | ||
|
|
12f3285f9b | ||
|
|
7e954e4248 | ||
|
|
d74cc6397b | ||
|
|
777343331e | ||
|
|
a062653743 | ||
|
|
57af0eb64f | ||
|
|
60aae16752 | ||
|
|
e264d95019 | ||
|
|
0664f5a724 | ||
|
|
17c6a19527 | ||
|
|
cbc8b8259b | ||
|
|
1067a2e4be | ||
|
|
74a031a759 | ||
|
|
ee437193fb | ||
|
|
436c53037e | ||
|
|
f55ba9d3cb | ||
|
|
8adb99b768 | ||
|
|
13c42412d2 | ||
|
|
75507d8b35 | ||
|
|
ddfe4932ac | ||
|
|
28ac016928 | ||
|
|
9ec8e5a275 | ||
|
|
a473046058 | ||
|
|
a69b7a5a01 | ||
|
|
640918bcc0 | ||
|
|
f39fbdb3fc | ||
|
|
50d4d81062 | ||
|
|
3b95452481 | ||
|
|
c152ae3c32 | ||
|
|
f6cbaa78e8 | ||
|
|
7adb250b59 | ||
|
|
db5db5aefd | ||
|
|
8fdf84de04 | ||
|
|
ff5cbe80d1 | ||
|
|
e013e0a374 | ||
|
|
b7df312ca7 | ||
|
|
ce82c3c0ae | ||
|
|
2f958cfbda | ||
|
|
8ef41dfd97 | ||
|
|
3082ea4765 | ||
|
|
e482d29951 | ||
|
|
ff48dd7bfb | ||
|
|
7bf9c11822 | ||
|
|
f7937f1e4b | ||
|
|
0115eeabfe | ||
|
|
4b9c3ec0da | ||
|
|
55b81e35a7 | ||
|
|
2a1c7f2d47 | ||
|
|
8603f9838f | ||
|
|
95224f3f11 | ||
|
|
f81acbfe80 | ||
|
|
6d7ff7eba2 | ||
|
|
ad429db7e8 | ||
|
|
4c07abbaf4 | ||
|
|
e3c0551129 | ||
|
|
8971baa42b | ||
|
|
317a1f51f7 | ||
|
|
c63d139482 | ||
|
|
9e682362e9 | ||
|
|
56ec939692 | ||
|
|
a86b942730 | ||
|
|
52eb4c6014 | ||
|
|
f4adbbf90c | ||
|
|
cc86e4a7d2 | ||
|
|
e864447e4a | ||
|
|
73bf552cd6 | ||
|
|
f20a2d2ee9 | ||
|
|
0c25bc063c | ||
|
|
db72781d2a | ||
|
|
0c8ad09040 | ||
|
|
49880ab761 | ||
|
|
fe2d9aa600 | ||
|
|
1dead425e4 | ||
|
|
adb1e47a59 | ||
|
|
ffba8580c1 | ||
|
|
ea18427d29 | ||
|
|
f3089df086 | ||
|
|
157e7c97ae | ||
|
|
bb8e13e3c9 | ||
|
|
5b4673e8eb | ||
|
|
5b9de8cc07 | ||
|
|
33ea934c8f | ||
|
|
6b3e14b0a4 | ||
|
|
098ceb5567 | ||
|
|
8e2b0632e8 | ||
|
|
420d373d89 | ||
|
|
a59fd7eeb3 | ||
|
|
ee91fa1228 | ||
|
|
a2b5ce0172 | ||
|
|
3efbc71a01 | ||
|
|
b7c5af7e64 | ||
|
|
f939015b97 | ||
|
|
a9ed71f553 | ||
|
|
96a429694f | ||
|
|
fddc5e022e | ||
|
|
2236d53def | ||
|
|
4e018d0a20 | ||
|
|
977b983771 | ||
|
|
fa7a7fe23e | ||
|
|
724a843bbd | ||
|
|
a9ec745275 | ||
|
|
c2ecc15b93 | ||
|
|
83c8650b36 | ||
|
|
89cb809922 | ||
|
|
fdb4eaf437 | ||
|
|
0432f97555 | ||
|
|
8d1631b714 | ||
|
|
dac091552d | ||
|
|
ea027a95a8 | ||
|
|
f73abb05a7 | ||
|
|
d71c49494f | ||
|
|
25665f0841 | ||
|
|
1eec27f890 | ||
|
|
950f86200b | ||
|
|
e19f4931d1 | ||
|
|
0575b1f38d | ||
|
|
f6cd01f7cf | ||
|
|
f2fbc168af | ||
|
|
b50f6f1730 | ||
|
|
f8a7120d9c | ||
|
|
20dbf59420 | ||
|
|
c67a286aa6 | ||
|
|
c96fef6bc8 | ||
|
|
bba02f87ea | ||
|
|
12dc3f5c28 | ||
|
|
0f01a5dcbe | ||
|
|
664dc3bdda | ||
|
|
bdba3cd97d | ||
|
|
d9c0f9315a | ||
|
|
b7f17d435f | ||
|
|
37cdc18639 | ||
|
|
5893a9c49d | ||
|
|
24f58fa16a | ||
|
|
56ffc78fa4 | ||
|
|
061e68bc77 | ||
|
|
177e6312b4 | ||
|
|
1acf4032c2 | ||
|
|
9c5444698e | ||
|
|
65f3252760 | ||
|
|
e612abe4ba | ||
|
|
34352e4e0e | ||
|
|
1867b5b317 | ||
|
|
a5b7fca7e0 | ||
|
|
7be2c399b1 | ||
|
|
d6337b3b22 | ||
|
|
d2f8b0ace5 | ||
|
|
d805e8b183 | ||
|
|
1f0f2ec05f | ||
|
|
91ac3b9d7c | ||
|
|
d65bf2eb2f | ||
|
|
1bba9d4307 | ||
|
|
4388338dad | ||
|
|
2fb59c90cf | ||
|
|
68f6ea8def | ||
|
|
3f89295d10 | ||
|
|
748b292e77 | ||
|
|
6451c3d99d | ||
|
|
d14a2de168 | ||
|
|
642150095d | ||
|
|
3bf3ac7922 | ||
|
|
c6d1cebad4 | ||
|
|
08189ce08c | ||
|
|
7013d7d52f | ||
|
|
7045b76f84 | ||
|
|
58a0b4a20d | ||
|
|
0f8eee9809 | ||
|
|
0740299860 | ||
|
|
652215861e | ||
|
|
602209e5a8 | ||
|
|
b60f8b4f70 | ||
|
|
b67446d998 | ||
|
|
9670ab0887 | ||
|
|
0223bb85ee | ||
|
|
fd81255db1 | ||
|
|
8a8e1a7f73 | ||
|
|
ef05fbf424 | ||
|
|
fa01b63fa5 | ||
|
|
63d3d25030 | ||
|
|
a8db866228 | ||
|
|
0519eea951 | ||
|
|
f4653ecd11 | ||
|
|
5d67252ed0 | ||
|
|
5134de71c0 | ||
|
|
2be1251c70 | ||
|
|
c0161aa17f | ||
|
|
b683aa11b1 | ||
|
|
2654bb0112 | ||
|
|
d8728104b4 | ||
|
|
0be1b70fba | ||
|
|
a0e9793de3 | ||
|
|
da9200fcee | ||
|
|
54e8e8022b | ||
|
|
d84cf781da | ||
|
|
002f27a30f | ||
|
|
86d88e9773 | ||
|
|
fda00afe6e | ||
|
|
be0c77d556 | ||
|
|
0ed11a7832 | ||
|
|
ff6971fb15 | ||
|
|
5b4dbc8167 | ||
|
|
59f4c9985e | ||
|
|
8da9be1a09 | ||
|
|
11033e108e | ||
|
|
4f97262cf2 | ||
|
|
9b68b9087a | ||
|
|
15cc812e37 | ||
|
|
71317e6aa6 | ||
|
|
1abaaee73e | ||
|
|
78c6d3c02f | ||
|
|
48e9d4af39 | ||
|
|
cb7ad371c6 | ||
|
|
2951589825 | ||
|
|
f23dc5366a | ||
|
|
e3341176c5 | ||
|
|
8938e14442 | ||
|
|
4151778f5e | ||
|
|
23b85cd88d | ||
|
|
234e5cd3e1 | ||
|
|
f75c94a8f1 | ||
|
|
848a432640 | ||
|
|
dea13979e0 | ||
|
|
052d34bf5b | ||
|
|
d4c5e82896 | ||
|
|
562d61caff | ||
|
|
75f18c7c66 | ||
|
|
5d35349dc9 | ||
|
|
1a81173c93 | ||
|
|
1d9201fe3d | ||
|
|
6dbb15027a | ||
|
|
f23d030e43 | ||
|
|
701334ccf2 | ||
|
|
f48a662ed3 | ||
|
|
ced3f1f5fc | ||
|
|
018aa96c8b | ||
|
|
34eda04d9b | ||
|
|
45767ad197 | ||
|
|
f9463af75b | ||
|
|
6f6e28077f | ||
|
|
0a9a7c939a | ||
|
|
f30a5dea79 | ||
|
|
018b547c40 | ||
|
|
e82a720223 | ||
|
|
8d1b77b235 | ||
|
|
b8987faeee | ||
|
|
17fdab2793 | ||
|
|
1fa6520cb6 | ||
|
|
b6af5c16c6 | ||
|
|
10ebe88abf | ||
|
|
c0b41ad6f5 | ||
|
|
9920b30318 | ||
|
|
07f218137a | ||
|
|
89a5248f4f | ||
|
|
891919074e | ||
|
|
4adf527a4d | ||
|
|
533b539780 | ||
|
|
6f26ae9801 | ||
|
|
ddcdfff3ae | ||
|
|
5b48354d9a | ||
|
|
46bfef3fce | ||
|
|
20536bb339 | ||
|
|
f6605ee465 | ||
|
|
034507a35b | ||
|
|
0b2febcec0 | ||
|
|
d2fa735ef1 | ||
|
|
20f34b67da | ||
|
|
03f3db1e89 | ||
|
|
9805b0742d | ||
|
|
6000c696b2 | ||
|
|
5a2edf723b | ||
|
|
aec7da740a | ||
|
|
a79bc75b72 | ||
|
|
eaaebf7928 | ||
|
|
198aa9620e | ||
|
|
27c53a3c25 | ||
|
|
bd70182369 | ||
|
|
04df63d955 | ||
|
|
d59131d670 | ||
|
|
9475e13d81 | ||
|
|
765d86076f | ||
|
|
e2b6ed3db8 |
9
.gitignore
vendored
9
.gitignore
vendored
@@ -5,4 +5,11 @@ ispc
|
||||
ispc_test
|
||||
objs
|
||||
docs/doxygen
|
||||
docs/ispc.html
|
||||
docs/*.html
|
||||
tests*/*cpp
|
||||
tests*/*run
|
||||
examples/*/*.png
|
||||
examples/*/*.ppm
|
||||
examples/*/objs/*
|
||||
|
||||
|
||||
|
||||
145
Makefile
145
Makefile
@@ -2,23 +2,60 @@
|
||||
# ispc Makefile
|
||||
#
|
||||
|
||||
# If you have your own special version of llvm and/or clang, change
|
||||
# these variables to match.
|
||||
LLVM_CONFIG=$(shell which llvm-config)
|
||||
CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
|
||||
|
||||
# Add llvm bin to the path so any scripts run will go to the right llvm-config
|
||||
LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
|
||||
export PATH:=$(LLVM_BIN):$(PATH)
|
||||
|
||||
ARCH_OS = $(shell uname)
|
||||
ifeq ($(ARCH_OS), Darwin)
|
||||
ARCH_OS2 = "OSX"
|
||||
else
|
||||
ARCH_OS2 = $(shell uname -o)
|
||||
endif
|
||||
ARCH_TYPE = $(shell arch)
|
||||
|
||||
ifeq ($(shell $(LLVM_CONFIG) --version), 3.1svn)
|
||||
LLVM_LIBS=-lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker \
|
||||
-lLLVMArchive -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo \
|
||||
-lLLVMBitWriter -lLLVMTableGen \
|
||||
-lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMSelectionDAG \
|
||||
-lLLVMAsmPrinter -lLLVMX86AsmParser -lLLVMX86Desc -lLLVMX86Info \
|
||||
-lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCDisassembler -lLLVMMCParser \
|
||||
-lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMTransformUtils \
|
||||
-lLLVMipa -lLLVMAnalysis -lLLVMMCJIT -lLLVMRuntimeDyld \
|
||||
-lLLVMExecutionEngine -lLLVMTarget -lLLVMMC -lLLVMObject -lLLVMCore \
|
||||
-lLLVMSupport
|
||||
else
|
||||
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs)
|
||||
endif
|
||||
|
||||
CLANG=clang
|
||||
CLANG_LIBS = -lclangFrontend -lclangDriver \
|
||||
-lclangSerialization -lclangParse -lclangSema \
|
||||
-lclangAnalysis -lclangAST -lclangLex -lclangBasic
|
||||
ifeq ($(shell $(LLVM_CONFIG) --version), 3.1svn)
|
||||
CLANG_LIBS += -lclangEdit
|
||||
endif
|
||||
|
||||
ISPC_LIBS=$(CLANG_LIBS) \
|
||||
$(shell llvm-config --ldflags --libs) \
|
||||
-lpthread -ldl
|
||||
ISPC_TEST_LIBS=$(shell llvm-config --ldflags --libs) \
|
||||
-lpthread -ldl
|
||||
ISPC_LIBS=$(shell $(LLVM_CONFIG) --ldflags) $(CLANG_LIBS) $(LLVM_LIBS) \
|
||||
-lpthread
|
||||
|
||||
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
||||
LLVM_VERSION=$(shell llvm-config --version | sed s/\\./_/)
|
||||
LLVM_VERSION_DEF=-DLLVM_$(LLVM_VERSION)
|
||||
ifeq ($(ARCH_OS),Linux)
|
||||
ISPC_LIBS += -ldl
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH_OS2),Msys)
|
||||
ISPC_LIBS += -lshlwapi -limagehlp -lpsapi
|
||||
endif
|
||||
|
||||
LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
|
||||
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed s/\\./_/)
|
||||
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
||||
|
||||
BUILD_DATE=$(shell date +%Y%m%d)
|
||||
BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
|
||||
@@ -26,18 +63,15 @@ BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
|
||||
CXX=g++
|
||||
CPP=cpp
|
||||
OPT=-g3
|
||||
CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -Wall $(LLVM_VERSION_DEF) \
|
||||
CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE) \
|
||||
-Wall $(LLVM_VERSION_DEF) \
|
||||
-DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\""
|
||||
|
||||
LDFLAGS=
|
||||
ifeq ($(ARCH_OS),Linux)
|
||||
# try to link everything statically under Linux (including libstdc++) so
|
||||
# that the binaries we generate will be portable across distributions...
|
||||
ifeq ($(ARCH_TYPE),x86_64)
|
||||
LDFLAGS=-static -L/usr/lib/gcc/x86_64-linux-gnu/4.4
|
||||
else
|
||||
LDFLAGS=-L/usr/lib/gcc/i686-redhat-linux/4.6.0
|
||||
endif
|
||||
LDFLAGS=-static
|
||||
endif
|
||||
|
||||
LEX=flex
|
||||
@@ -45,21 +79,25 @@ YACC=bison -d -v -t
|
||||
|
||||
###########################################################################
|
||||
|
||||
CXX_SRC=ast.cpp builtins.cpp ctx.cpp decl.cpp expr.cpp func.cpp ispc.cpp \
|
||||
llvmutil.cpp main.cpp module.cpp opt.cpp stmt.cpp sym.cpp type.cpp \
|
||||
util.cpp
|
||||
CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
||||
ispc.cpp llvmutil.cpp main.cpp module.cpp opt.cpp stmt.cpp sym.cpp \
|
||||
type.cpp util.cpp
|
||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||
opt.h stmt.h sym.h type.h util.h
|
||||
BUILTINS_SRC=builtins-avx.ll builtins-avx-x2.ll builtins-sse2.ll builtins-sse2-x2.ll \
|
||||
builtins-sse4.ll builtins-sse4-x2.ll builtins-dispatch.ll
|
||||
TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
|
||||
generic-16 generic-1
|
||||
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
|
||||
builtins/dispatch.ll
|
||||
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
|
||||
builtins-c-32.cpp builtins-c-64.cpp
|
||||
BISON_SRC=parse.yy
|
||||
FLEX_SRC=lex.ll
|
||||
|
||||
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_SRC:.ll=.o) \
|
||||
builtins-c-32.o builtins-c-64.o stdlib_ispc.o $(BISON_SRC:.yy=.o) \
|
||||
$(FLEX_SRC:.ll=.o))
|
||||
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \
|
||||
stdlib_generic_ispc.o stdlib_x86_ispc.o \
|
||||
$(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o))
|
||||
|
||||
default: ispc ispc_test
|
||||
default: ispc
|
||||
|
||||
.PHONY: dirs clean depend doxygen print_llvm_src
|
||||
.PRECIOUS: objs/builtins-%.cpp
|
||||
@@ -78,7 +116,7 @@ print_llvm_src:
|
||||
@echo Using LLVM `llvm-config --version` from `llvm-config --libdir`
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs ispc ispc_test
|
||||
/bin/rm -rf objs ispc
|
||||
|
||||
doxygen:
|
||||
/bin/rm -rf docs/doxygen
|
||||
@@ -88,14 +126,18 @@ ispc: print_llvm_src dirs $(OBJS)
|
||||
@echo Creating ispc executable
|
||||
@$(CXX) $(LDFLAGS) -o $@ $(OBJS) $(ISPC_LIBS)
|
||||
|
||||
ispc_test: dirs ispc_test.cpp
|
||||
@echo Creating ispc_test executable
|
||||
@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ ispc_test.cpp $(ISPC_TEST_LIBS)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/cbackend.o: cbackend.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) -fno-rtti -fno-exceptions $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/%.o: objs/%.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/parse.cc: parse.yy
|
||||
@echo Running bison on $<
|
||||
@$(YACC) -o $@ $<
|
||||
@@ -112,41 +154,24 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/builtins-%.cpp: builtins-%.ll
|
||||
@echo Creating C++ source from builtin definitions file $<
|
||||
@m4 -DLLVM_VERSION=$(LLVM_VERSION) builtins.m4 $< | ./bitcode2cpp.py $< > $@
|
||||
|
||||
objs/builtins-%.o: objs/builtins-%.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/builtins-c-32.cpp: builtins-c.c
|
||||
objs/builtins-%.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll)
|
||||
@echo Creating C++ source from builtins definition file $<
|
||||
@$(CLANG) -m32 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py builtins-c-32.c > $@
|
||||
@m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) $< | python bitcode2cpp.py $< > $@
|
||||
|
||||
objs/builtins-c-32.o: objs/builtins-c-32.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/builtins-c-64.cpp: builtins-c.c
|
||||
objs/builtins-c-32.cpp: builtins/builtins.c
|
||||
@echo Creating C++ source from builtins definition file $<
|
||||
@$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py builtins-c-64.c > $@
|
||||
@$(CLANG) -m32 -emit-llvm -c $< -o - | llvm-dis - | python bitcode2cpp.py c-32 > $@
|
||||
|
||||
objs/builtins-c-64.o: objs/builtins-c-64.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
objs/builtins-c-64.cpp: builtins/builtins.c
|
||||
@echo Creating C++ source from builtins definition file $<
|
||||
@$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | python bitcode2cpp.py c-64 > $@
|
||||
|
||||
objs/stdlib_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $<
|
||||
@$(CLANG) -E -x c -DISPC=1 -DPI=3.1415926536 $< -o - | ./stdlib2cpp.py > $@
|
||||
objs/stdlib_generic_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $< for generic
|
||||
@$(CLANG) -E -x c -DISPC_TARGET_GENERIC=1 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||
python stdlib2cpp.py generic > $@
|
||||
|
||||
objs/stdlib_ispc.o: objs/stdlib_ispc.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/builtins-sse2.cpp: builtins.m4 builtins-sse2-common.ll builtins-sse2.ll
|
||||
objs/builtins-sse2-x2.cpp: builtins.m4 builtins-sse2-common.ll builtins-sse2-x2.ll
|
||||
objs/builtins-sse4.cpp: builtins.m4 builtins-sse4-common.ll builtins-sse4.ll
|
||||
objs/builtins-sse4-x2.cpp: builtins.m4 builtins-sse4-common.ll builtins-sse4-x2.ll
|
||||
objs/builtins-avx.cpp: builtins.m4 builtins-avx-common.ll builtins-avx.ll
|
||||
objs/builtins-avx-x2.cpp: builtins.m4 builtins-avx-common.ll builtins-avx-x2.ll
|
||||
objs/stdlib_x86_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $< for x86
|
||||
@$(CLANG) -E -x c -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||
python stdlib2cpp.py x86 > $@
|
||||
|
||||
90
README.rst
Normal file
90
README.rst
Normal file
@@ -0,0 +1,90 @@
|
||||
==============================
|
||||
Intel(r) SPMD Program Compiler
|
||||
==============================
|
||||
|
||||
``ispc`` is a compiler for a variant of the C programming language, with
|
||||
extensions for `single program, multiple data
|
||||
<http://en.wikipedia.org/wiki/SPMD>`_ programming. Under the SPMD model,
|
||||
the programmer writes a program that generally appears to be a regular
|
||||
serial program, though the execution model is actually that a number of
|
||||
*program instances* execute in parallel on the hardware.
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
``ispc`` compiles a C-based SPMD programming language to run on the SIMD
|
||||
units of CPUs; it frequently provides a 3x or more speedup on CPUs with
|
||||
4-wide vector SSE units and 5x-6x on CPUs with 8-wide AVX vector units,
|
||||
without any of the difficulty of writing intrinsics code. Parallelization
|
||||
across multiple cores is also supported by ``ispc``, making it
|
||||
possible to write programs that achieve performance improvement that scales
|
||||
by both number of cores and vector unit size.
|
||||
|
||||
There are a few key principles in the design of ``ispc``:
|
||||
|
||||
* To build a small set of extensions to the C language that
|
||||
would deliver excellent performance to performance-oriented
|
||||
programmers who want to run SPMD programs on the CPU.
|
||||
|
||||
* To provide a thin abstraction layer between the programmer
|
||||
and the hardware--in particular, to have an execution and
|
||||
data model where the programmer can cleanly reason about the
|
||||
mapping of their source program to compiled assembly language
|
||||
and the underlying hardware.
|
||||
|
||||
* To make it possible to harness the computational power of SIMD
|
||||
vector units without the extremely low-programmer-productivity
|
||||
activity of directly writing intrinsics.
|
||||
|
||||
* To explore opportunities from close coupling between C/C++
|
||||
application code and SPMD ``ispc`` code running on the
|
||||
same processor--to have lightweight function calls between
|
||||
the two languages and to share data directly via pointers without
|
||||
copying or reformatting.
|
||||
|
||||
``ispc`` is an open source compiler with the BSD license. It uses the
|
||||
remarkable `LLVM Compiler Infrastructure <http://llvm.org>`_ for back-end
|
||||
code generation and optimization and is `hosted on
|
||||
github <http://github.com/ispc/ispc/>`_. It supports Windows, Mac, and
|
||||
Linux, with both x86 and x86-64 targets. It currently supports the SSE2,
|
||||
SSE4, AVX1, and AVX2 instruction sets.
|
||||
|
||||
Features
|
||||
--------
|
||||
|
||||
``ispc`` provides a number of key features to developers:
|
||||
|
||||
* Familiarity as an extension of the C programming
|
||||
language: ``ispc`` supports familiar C syntax and
|
||||
programming idioms, while adding the ability to write SPMD
|
||||
programs.
|
||||
|
||||
* High-quality SIMD code generation: the performance
|
||||
of code generated by ``ispc`` is often close to that of
|
||||
hand-written intrinsics code.
|
||||
|
||||
* Ease of adoption with existing software
|
||||
systems: functions written in ``ispc`` directly
|
||||
interoperate with application functions written in C/C++ and
|
||||
with application data structures.
|
||||
|
||||
* Portability across over a decade of CPU
|
||||
generations: ``ispc`` has targets for SSE2, SSE4, AVX
|
||||
(and soon, AVX2).
|
||||
|
||||
* Portability across operating systems: Microsoft
|
||||
Windows, Mac OS X, and Linux are all supported
|
||||
by ``ispc``.
|
||||
|
||||
* Debugging with standard tools: ``ispc``
|
||||
programs can be debugged with standard debuggers (OS X and
|
||||
Linux only).
|
||||
|
||||
Additional Resources
|
||||
--------------------
|
||||
|
||||
Prebuilt ``ispc`` binaries for Windows, OS X and Linux can be downloaded
|
||||
from the `ispc downloads page <http://ispc.github.com/downloads.html>`_.
|
||||
See also additional
|
||||
`documentation <http://ispc.github.com/documentation.html>`_ and additional
|
||||
`performance information <http://ispc.github.com/perf.html>`_.
|
||||
22
README.txt
22
README.txt
@@ -1,22 +0,0 @@
|
||||
==============================
|
||||
Intel(r) SPMD Program Compiler
|
||||
==============================
|
||||
|
||||
Welcome to the Intel(r) SPMD Program Compiler (ispc)!
|
||||
|
||||
ispc is a new compiler for "single program, multiple data" (SPMD)
|
||||
programs. Under the SPMD model, the programmer writes a program that mostly
|
||||
appears to be a regular serial program, though the execution model is
|
||||
actually that a number of program instances execute in parallel on the
|
||||
hardware. ispc compiles a C-based SPMD programming language to run on the
|
||||
SIMD units of CPUs; it frequently provides a a 3x or more speedup on CPUs
|
||||
with 4-wide SSE units, without any of the difficulty of writing intrinsics
|
||||
code.
|
||||
|
||||
ispc is an open source compiler under the BSD license; see the file
|
||||
LICENSE.txt. ispc supports Windows, Mac, and Linux, with both x86 and
|
||||
x86-64 targets. It currently supports the SSE2, SSE4, and AVX instruction
|
||||
sets.
|
||||
|
||||
For more information and examples, as well as a wiki and the bug database,
|
||||
see the ispc distribution site, http://ispc.github.com.
|
||||
416
ast.cpp
416
ast.cpp
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
Copyright (c) 2011-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,12 +32,17 @@
|
||||
*/
|
||||
|
||||
/** @file ast.cpp
|
||||
@brief
|
||||
*/
|
||||
|
||||
@brief General functionality related to abstract syntax trees and
|
||||
traversal of them.
|
||||
*/
|
||||
|
||||
#include "ast.h"
|
||||
#include "expr.h"
|
||||
#include "func.h"
|
||||
#include "stmt.h"
|
||||
#include "sym.h"
|
||||
#include "util.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// ASTNode
|
||||
@@ -50,10 +55,10 @@ ASTNode::~ASTNode() {
|
||||
// AST
|
||||
|
||||
void
|
||||
AST::AddFunction(Symbol *sym, const std::vector<Symbol *> &args, Stmt *code) {
|
||||
AST::AddFunction(Symbol *sym, Stmt *code) {
|
||||
if (sym == NULL)
|
||||
return;
|
||||
functions.push_back(new Function(sym, args, code));
|
||||
functions.push_back(new Function(sym, code));
|
||||
}
|
||||
|
||||
|
||||
@@ -63,3 +68,404 @@ AST::GenerateIR() {
|
||||
functions[i]->GenerateIR();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
ASTNode *
|
||||
WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
|
||||
void *data) {
|
||||
if (node == NULL)
|
||||
return node;
|
||||
|
||||
// Call the callback function
|
||||
if (preFunc != NULL) {
|
||||
if (preFunc(node, data) == false)
|
||||
// The function asked us to not continue recursively, so stop.
|
||||
return node;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Handle Statements
|
||||
if (dynamic_cast<Stmt *>(node) != NULL) {
|
||||
ExprStmt *es;
|
||||
DeclStmt *ds;
|
||||
IfStmt *is;
|
||||
DoStmt *dos;
|
||||
ForStmt *fs;
|
||||
ForeachStmt *fes;
|
||||
CaseStmt *cs;
|
||||
DefaultStmt *defs;
|
||||
SwitchStmt *ss;
|
||||
ReturnStmt *rs;
|
||||
LabeledStmt *ls;
|
||||
StmtList *sl;
|
||||
PrintStmt *ps;
|
||||
AssertStmt *as;
|
||||
DeleteStmt *dels;
|
||||
|
||||
if ((es = dynamic_cast<ExprStmt *>(node)) != NULL)
|
||||
es->expr = (Expr *)WalkAST(es->expr, preFunc, postFunc, data);
|
||||
else if ((ds = dynamic_cast<DeclStmt *>(node)) != NULL) {
|
||||
for (unsigned int i = 0; i < ds->vars.size(); ++i)
|
||||
ds->vars[i].init = (Expr *)WalkAST(ds->vars[i].init, preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((is = dynamic_cast<IfStmt *>(node)) != NULL) {
|
||||
is->test = (Expr *)WalkAST(is->test, preFunc, postFunc, data);
|
||||
is->trueStmts = (Stmt *)WalkAST(is->trueStmts, preFunc,
|
||||
postFunc, data);
|
||||
is->falseStmts = (Stmt *)WalkAST(is->falseStmts, preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((dos = dynamic_cast<DoStmt *>(node)) != NULL) {
|
||||
dos->testExpr = (Expr *)WalkAST(dos->testExpr, preFunc,
|
||||
postFunc, data);
|
||||
dos->bodyStmts = (Stmt *)WalkAST(dos->bodyStmts, preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((fs = dynamic_cast<ForStmt *>(node)) != NULL) {
|
||||
fs->init = (Stmt *)WalkAST(fs->init, preFunc, postFunc, data);
|
||||
fs->test = (Expr *)WalkAST(fs->test, preFunc, postFunc, data);
|
||||
fs->step = (Stmt *)WalkAST(fs->step, preFunc, postFunc, data);
|
||||
fs->stmts = (Stmt *)WalkAST(fs->stmts, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((fes = dynamic_cast<ForeachStmt *>(node)) != NULL) {
|
||||
for (unsigned int i = 0; i < fes->startExprs.size(); ++i)
|
||||
fes->startExprs[i] = (Expr *)WalkAST(fes->startExprs[i], preFunc,
|
||||
postFunc, data);
|
||||
for (unsigned int i = 0; i < fes->endExprs.size(); ++i)
|
||||
fes->endExprs[i] = (Expr *)WalkAST(fes->endExprs[i], preFunc,
|
||||
postFunc, data);
|
||||
fes->stmts = (Stmt *)WalkAST(fes->stmts, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((cs = dynamic_cast<CaseStmt *>(node)) != NULL)
|
||||
cs->stmts = (Stmt *)WalkAST(cs->stmts, preFunc, postFunc, data);
|
||||
else if ((defs = dynamic_cast<DefaultStmt *>(node)) != NULL)
|
||||
defs->stmts = (Stmt *)WalkAST(defs->stmts, preFunc, postFunc, data);
|
||||
else if ((ss = dynamic_cast<SwitchStmt *>(node)) != NULL) {
|
||||
ss->expr = (Expr *)WalkAST(ss->expr, preFunc, postFunc, data);
|
||||
ss->stmts = (Stmt *)WalkAST(ss->stmts, preFunc, postFunc, data);
|
||||
}
|
||||
else if (dynamic_cast<BreakStmt *>(node) != NULL ||
|
||||
dynamic_cast<ContinueStmt *>(node) != NULL ||
|
||||
dynamic_cast<GotoStmt *>(node) != NULL) {
|
||||
// nothing
|
||||
}
|
||||
else if ((ls = dynamic_cast<LabeledStmt *>(node)) != NULL)
|
||||
ls->stmt = (Stmt *)WalkAST(ls->stmt, preFunc, postFunc, data);
|
||||
else if ((rs = dynamic_cast<ReturnStmt *>(node)) != NULL)
|
||||
rs->expr = (Expr *)WalkAST(rs->expr, preFunc, postFunc, data);
|
||||
else if ((sl = dynamic_cast<StmtList *>(node)) != NULL) {
|
||||
std::vector<Stmt *> &sls = sl->stmts;
|
||||
for (unsigned int i = 0; i < sls.size(); ++i)
|
||||
sls[i] = (Stmt *)WalkAST(sls[i], preFunc, postFunc, data);
|
||||
}
|
||||
else if ((ps = dynamic_cast<PrintStmt *>(node)) != NULL)
|
||||
ps->values = (Expr *)WalkAST(ps->values, preFunc, postFunc, data);
|
||||
else if ((as = dynamic_cast<AssertStmt *>(node)) != NULL)
|
||||
as->expr = (Expr *)WalkAST(as->expr, preFunc, postFunc, data);
|
||||
else if ((dels = dynamic_cast<DeleteStmt *>(node)) != NULL)
|
||||
dels->expr = (Expr *)WalkAST(dels->expr, preFunc, postFunc, data);
|
||||
else
|
||||
FATAL("Unhandled statement type in WalkAST()");
|
||||
}
|
||||
else {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Handle expressions
|
||||
Assert(dynamic_cast<Expr *>(node) != NULL);
|
||||
UnaryExpr *ue;
|
||||
BinaryExpr *be;
|
||||
AssignExpr *ae;
|
||||
SelectExpr *se;
|
||||
ExprList *el;
|
||||
FunctionCallExpr *fce;
|
||||
IndexExpr *ie;
|
||||
MemberExpr *me;
|
||||
TypeCastExpr *tce;
|
||||
ReferenceExpr *re;
|
||||
PtrDerefExpr *ptrderef;
|
||||
RefDerefExpr *refderef;
|
||||
SizeOfExpr *soe;
|
||||
AddressOfExpr *aoe;
|
||||
NewExpr *newe;
|
||||
|
||||
if ((ue = dynamic_cast<UnaryExpr *>(node)) != NULL)
|
||||
ue->expr = (Expr *)WalkAST(ue->expr, preFunc, postFunc, data);
|
||||
else if ((be = dynamic_cast<BinaryExpr *>(node)) != NULL) {
|
||||
be->arg0 = (Expr *)WalkAST(be->arg0, preFunc, postFunc, data);
|
||||
be->arg1 = (Expr *)WalkAST(be->arg1, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((ae = dynamic_cast<AssignExpr *>(node)) != NULL) {
|
||||
ae->lvalue = (Expr *)WalkAST(ae->lvalue, preFunc, postFunc, data);
|
||||
ae->rvalue = (Expr *)WalkAST(ae->rvalue, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((se = dynamic_cast<SelectExpr *>(node)) != NULL) {
|
||||
se->test = (Expr *)WalkAST(se->test, preFunc, postFunc, data);
|
||||
se->expr1 = (Expr *)WalkAST(se->expr1, preFunc, postFunc, data);
|
||||
se->expr2 = (Expr *)WalkAST(se->expr2, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((el = dynamic_cast<ExprList *>(node)) != NULL) {
|
||||
for (unsigned int i = 0; i < el->exprs.size(); ++i)
|
||||
el->exprs[i] = (Expr *)WalkAST(el->exprs[i], preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((fce = dynamic_cast<FunctionCallExpr *>(node)) != NULL) {
|
||||
fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data);
|
||||
fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data);
|
||||
fce->launchCountExpr = (Expr *)WalkAST(fce->launchCountExpr, preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL) {
|
||||
ie->baseExpr = (Expr *)WalkAST(ie->baseExpr, preFunc, postFunc, data);
|
||||
ie->index = (Expr *)WalkAST(ie->index, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((me = dynamic_cast<MemberExpr *>(node)) != NULL)
|
||||
me->expr = (Expr *)WalkAST(me->expr, preFunc, postFunc, data);
|
||||
else if ((tce = dynamic_cast<TypeCastExpr *>(node)) != NULL)
|
||||
tce->expr = (Expr *)WalkAST(tce->expr, preFunc, postFunc, data);
|
||||
else if ((re = dynamic_cast<ReferenceExpr *>(node)) != NULL)
|
||||
re->expr = (Expr *)WalkAST(re->expr, preFunc, postFunc, data);
|
||||
else if ((ptrderef = dynamic_cast<PtrDerefExpr *>(node)) != NULL)
|
||||
ptrderef->expr = (Expr *)WalkAST(ptrderef->expr, preFunc, postFunc,
|
||||
data);
|
||||
else if ((refderef = dynamic_cast<RefDerefExpr *>(node)) != NULL)
|
||||
refderef->expr = (Expr *)WalkAST(refderef->expr, preFunc, postFunc,
|
||||
data);
|
||||
else if ((soe = dynamic_cast<SizeOfExpr *>(node)) != NULL)
|
||||
soe->expr = (Expr *)WalkAST(soe->expr, preFunc, postFunc, data);
|
||||
else if ((aoe = dynamic_cast<AddressOfExpr *>(node)) != NULL)
|
||||
aoe->expr = (Expr *)WalkAST(aoe->expr, preFunc, postFunc, data);
|
||||
else if ((newe = dynamic_cast<NewExpr *>(node)) != NULL) {
|
||||
newe->countExpr = (Expr *)WalkAST(newe->countExpr, preFunc,
|
||||
postFunc, data);
|
||||
newe->initExpr = (Expr *)WalkAST(newe->initExpr, preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if (dynamic_cast<SymbolExpr *>(node) != NULL ||
|
||||
dynamic_cast<ConstExpr *>(node) != NULL ||
|
||||
dynamic_cast<FunctionSymbolExpr *>(node) != NULL ||
|
||||
dynamic_cast<SyncExpr *>(node) != NULL ||
|
||||
dynamic_cast<NullPointerExpr *>(node) != NULL) {
|
||||
// nothing to do
|
||||
}
|
||||
else
|
||||
FATAL("Unhandled expression type in WalkAST().");
|
||||
}
|
||||
|
||||
// Call the callback function
|
||||
if (postFunc != NULL)
|
||||
return postFunc(node, data);
|
||||
else
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
static ASTNode *
|
||||
lOptimizeNode(ASTNode *node, void *) {
|
||||
return node->Optimize();
|
||||
}
|
||||
|
||||
|
||||
ASTNode *
|
||||
Optimize(ASTNode *root) {
|
||||
return WalkAST(root, NULL, lOptimizeNode, NULL);
|
||||
}
|
||||
|
||||
|
||||
Expr *
|
||||
Optimize(Expr *expr) {
|
||||
return (Expr *)Optimize((ASTNode *)expr);
|
||||
}
|
||||
|
||||
|
||||
Stmt *
|
||||
Optimize(Stmt *stmt) {
|
||||
return (Stmt *)Optimize((ASTNode *)stmt);
|
||||
}
|
||||
|
||||
|
||||
static ASTNode *
|
||||
lTypeCheckNode(ASTNode *node, void *) {
|
||||
return node->TypeCheck();
|
||||
}
|
||||
|
||||
|
||||
ASTNode *
|
||||
TypeCheck(ASTNode *root) {
|
||||
return WalkAST(root, NULL, lTypeCheckNode, NULL);
|
||||
}
|
||||
|
||||
|
||||
Expr *
|
||||
TypeCheck(Expr *expr) {
|
||||
return (Expr *)TypeCheck((ASTNode *)expr);
|
||||
}
|
||||
|
||||
|
||||
Stmt *
|
||||
TypeCheck(Stmt *stmt) {
|
||||
return (Stmt *)TypeCheck((ASTNode *)stmt);
|
||||
}
|
||||
|
||||
|
||||
struct CostData {
|
||||
CostData() { cost = foreachDepth = 0; }
|
||||
|
||||
int cost;
|
||||
int foreachDepth;
|
||||
};
|
||||
|
||||
|
||||
static bool
|
||||
lCostCallbackPre(ASTNode *node, void *d) {
|
||||
CostData *data = (CostData *)d;
|
||||
if (dynamic_cast<ForeachStmt *>(node) != NULL)
|
||||
++data->foreachDepth;
|
||||
if (data->foreachDepth == 0)
|
||||
data->cost += node->EstimateCost();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static ASTNode *
|
||||
lCostCallbackPost(ASTNode *node, void *d) {
|
||||
CostData *data = (CostData *)d;
|
||||
if (dynamic_cast<ForeachStmt *>(node) != NULL)
|
||||
--data->foreachDepth;
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
EstimateCost(ASTNode *root) {
|
||||
CostData data;
|
||||
WalkAST(root, lCostCallbackPre, lCostCallbackPost, &data);
|
||||
return data.cost;
|
||||
}
|
||||
|
||||
|
||||
/** Given an AST node, check to see if it's safe if we happen to run the
|
||||
code for that node with the execution mask all off.
|
||||
*/
|
||||
static bool
|
||||
lCheckAllOffSafety(ASTNode *node, void *data) {
|
||||
bool *okPtr = (bool *)data;
|
||||
|
||||
FunctionCallExpr *fce;
|
||||
if ((fce = dynamic_cast<FunctionCallExpr *>(node)) != NULL) {
|
||||
if (fce->func == NULL)
|
||||
return false;
|
||||
|
||||
const Type *type = fce->func->GetType();
|
||||
const PointerType *pt = dynamic_cast<const PointerType *>(type);
|
||||
if (pt != NULL)
|
||||
type = pt->GetBaseType();
|
||||
const FunctionType *ftype = dynamic_cast<const FunctionType *>(type);
|
||||
Assert(ftype != NULL);
|
||||
|
||||
if (ftype->isSafe == false) {
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (dynamic_cast<AssertStmt *>(node) != NULL) {
|
||||
// While it's fine to run the assert for varying tests, it's not
|
||||
// desirable to check an assert on a uniform variable if all of the
|
||||
// lanes are off.
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dynamic_cast<NewExpr *>(node) != NULL ||
|
||||
dynamic_cast<DeleteStmt *>(node) != NULL) {
|
||||
// We definitely don't want to run the uniform variants of these if
|
||||
// the mask is all off. It's also worth skipping the overhead of
|
||||
// executing the varying versions of them in the all-off mask case.
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dynamic_cast<ForeachStmt *>(node) != NULL) {
|
||||
// foreach() statements also shouldn't be run with an all-off mask.
|
||||
// Since they re-establish an 'all on' mask, this would be pretty
|
||||
// unintuitive. (More generally, it's possibly a little strange to
|
||||
// allow foreach() in the presence of any non-uniform control
|
||||
// flow...)
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (g->target.allOffMaskIsSafe == true)
|
||||
// Don't worry about memory accesses if we have a target that can
|
||||
// safely run them with the mask all off
|
||||
return true;
|
||||
|
||||
IndexExpr *ie;
|
||||
if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
|
||||
const Type *type = ie->baseExpr->GetType();
|
||||
if (type == NULL)
|
||||
return true;
|
||||
if (dynamic_cast<const ReferenceType *>(type) != NULL)
|
||||
type = type->GetReferenceTarget();
|
||||
|
||||
ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
|
||||
if (ce == NULL) {
|
||||
// indexing with a variable... -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
const PointerType *pointerType =
|
||||
dynamic_cast<const PointerType *>(type);
|
||||
if (pointerType != NULL) {
|
||||
// pointer[index] -> can't be sure -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
const SequentialType *seqType =
|
||||
dynamic_cast<const SequentialType *>(type);
|
||||
Assert(seqType != NULL);
|
||||
int nElements = seqType->GetElementCount();
|
||||
if (nElements == 0) {
|
||||
// Unsized array, so we can't be sure -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
int32_t indices[ISPC_MAX_NVEC];
|
||||
int count = ce->AsInt32(indices);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (indices[i] < 0 || indices[i] >= nElements) {
|
||||
// Index is out of bounds -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// All indices are in-bounds
|
||||
return true;
|
||||
}
|
||||
|
||||
MemberExpr *me;
|
||||
if ((me = dynamic_cast<MemberExpr *>(node)) != NULL &&
|
||||
me->dereferenceExpr) {
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dynamic_cast<PtrDerefExpr *>(node) != NULL) {
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
SafeToRunWithMaskAllOff(ASTNode *root) {
|
||||
bool safe = true;
|
||||
WalkAST(root, lCheckAllOffSafety, NULL, &safe);
|
||||
return safe;
|
||||
}
|
||||
|
||||
70
ast.h
70
ast.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
Copyright (c) 2011-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -53,10 +53,11 @@ public:
|
||||
virtual ~ASTNode();
|
||||
|
||||
/** The Optimize() method should perform any appropriate early-stage
|
||||
optimizations on the node (e.g. constant folding). The caller
|
||||
should use the returned ASTNode * in place of the original node.
|
||||
This method may return NULL if an error is encountered during
|
||||
optimization. */
|
||||
optimizations on the node (e.g. constant folding). This method
|
||||
will be called after the node's children have already been
|
||||
optimized, and the caller will store the returned ASTNode * in
|
||||
place of the original node. This method should return NULL if an
|
||||
error is encountered during optimization. */
|
||||
virtual ASTNode *Optimize() = 0;
|
||||
|
||||
/** Type checking should be performed by the node when this method is
|
||||
@@ -65,6 +66,9 @@ public:
|
||||
pointer in place of the original ASTNode *. */
|
||||
virtual ASTNode *TypeCheck() = 0;
|
||||
|
||||
/** Estimate the execution cost of the node (not including the cost of
|
||||
the children. The value returned should be based on the COST_*
|
||||
enumerant values defined in ispc.h. */
|
||||
virtual int EstimateCost() const = 0;
|
||||
|
||||
/** All AST nodes must track the file position where they are
|
||||
@@ -80,8 +84,7 @@ class AST {
|
||||
public:
|
||||
/** Add the AST for a function described by the given declaration
|
||||
information and source code. */
|
||||
void AddFunction(Symbol *sym, const std::vector<Symbol *> &args,
|
||||
Stmt *code);
|
||||
void AddFunction(Symbol *sym, Stmt *code);
|
||||
|
||||
/** Generate LLVM IR for all of the functions into the current
|
||||
module. */
|
||||
@@ -91,4 +94,57 @@ private:
|
||||
std::vector<Function *> functions;
|
||||
};
|
||||
|
||||
|
||||
/** Callback function type for preorder traversial visiting function for
|
||||
the AST walk.
|
||||
*/
|
||||
typedef bool (* ASTPreCallBackFunc)(ASTNode *node, void *data);
|
||||
|
||||
/** Callback function type for postorder traversial visiting function for
|
||||
the AST walk.
|
||||
*/
|
||||
typedef ASTNode * (* ASTPostCallBackFunc)(ASTNode *node, void *data);
|
||||
|
||||
/** Walk (some portion of) an AST, starting from the given root node. At
|
||||
each node, if preFunc is non-NULL, call it, passing the given void
|
||||
*data pointer; if the call to preFunc function returns false, then the
|
||||
children of the node aren't visited. This function then makes
|
||||
recursive calls to WalkAST() to process the node's children; after
|
||||
doing so, calls postFunc, at the node. The return value from the
|
||||
postFunc call is ignored. */
|
||||
extern ASTNode *WalkAST(ASTNode *root, ASTPreCallBackFunc preFunc,
|
||||
ASTPostCallBackFunc postFunc, void *data);
|
||||
|
||||
/** Perform simple optimizations on the AST or portion thereof passed to
|
||||
this function, returning the resulting AST. */
|
||||
extern ASTNode *Optimize(ASTNode *root);
|
||||
|
||||
/** Convenience version of Optimize() for Expr *s that returns an Expr *
|
||||
(rather than an ASTNode *, which would require the caller to cast back
|
||||
to an Expr *). */
|
||||
extern Expr *Optimize(Expr *);
|
||||
|
||||
/** Convenience version of Optimize() for Expr *s that returns an Stmt *
|
||||
(rather than an ASTNode *, which would require the caller to cast back
|
||||
to a Stmt *). */
|
||||
extern Stmt *Optimize(Stmt *);
|
||||
|
||||
/** Perform type-checking on the given AST (or portion of one), returning a
|
||||
pointer to the root of the resulting AST. */
|
||||
extern ASTNode *TypeCheck(ASTNode *root);
|
||||
|
||||
/** Convenience version of TypeCheck() for Expr *s that returns an Expr *. */
|
||||
extern Expr *TypeCheck(Expr *);
|
||||
|
||||
/** Convenience version of TypeCheck() for Stmt *s that returns an Stmt *. */
|
||||
extern Stmt *TypeCheck(Stmt *);
|
||||
|
||||
/** Returns an estimate of the execution cost of the tree starting at
|
||||
the given root. */
|
||||
extern int EstimateCost(ASTNode *root);
|
||||
|
||||
/** Returns true if it would be safe to run the given code with an "all
|
||||
off" mask. */
|
||||
extern bool SafeToRunWithMaskAllOff(ASTNode *root);
|
||||
|
||||
#endif // ISPC_AST_H
|
||||
|
||||
@@ -11,7 +11,10 @@ length=0
|
||||
|
||||
src=str(sys.argv[1])
|
||||
|
||||
target = re.sub(".*builtins-", "", src)
|
||||
target = re.sub("builtins/target-", "", src)
|
||||
target = re.sub(r"builtins\\target-", "", target)
|
||||
target = re.sub("builtins/", "", target)
|
||||
target = re.sub(r"builtins\\", "", target)
|
||||
target = re.sub("\.ll$", "", target)
|
||||
target = re.sub("\.c$", "", target)
|
||||
target = re.sub("-", "_", target)
|
||||
@@ -23,17 +26,21 @@ if platform.system() == 'Windows' or string.find(platform.system(), "CYGWIN_NT")
|
||||
try:
|
||||
as_out=subprocess.Popen([llvm_as, "-", "-o", "-"], stdout=subprocess.PIPE)
|
||||
except IOError:
|
||||
print >> sys.stderr, "Couldn't open " + src
|
||||
sys.stderr.write("Couldn't open " + src)
|
||||
sys.exit(1)
|
||||
|
||||
print "unsigned char builtins_bitcode_" + target + "[] = {"
|
||||
for line in as_out.stdout.readlines():
|
||||
length = length + len(line)
|
||||
for c in line:
|
||||
print ord(c)
|
||||
print ", "
|
||||
print " 0 };\n\n"
|
||||
print "int builtins_bitcode_" + target + "_length = " + str(length) + ";\n"
|
||||
width = 16;
|
||||
sys.stdout.write("unsigned char builtins_bitcode_" + target + "[] = {\n")
|
||||
|
||||
data = as_out.stdout.read()
|
||||
for i in range(0, len(data), 1):
|
||||
sys.stdout.write("0x%0.2X, " % ord(data[i:i+1]))
|
||||
|
||||
if i%width == (width-1):
|
||||
sys.stdout.write("\n")
|
||||
|
||||
sys.stdout.write("0x00 };\n\n")
|
||||
sys.stdout.write("int builtins_bitcode_" + target + "_length = " + str(i+1) + ";\n")
|
||||
|
||||
as_out.wait()
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ REM Both the LLVM binaries and python need to be in the path
|
||||
set path=%LLVM_INSTALL_DIR%\bin;%PATH%;c:\cygwin\bin
|
||||
|
||||
msbuild ispc.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release
|
||||
msbuild ispc_test.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release
|
||||
|
||||
msbuild examples\examples.sln /V:m /p:Platform=x64 /p:Configuration=Release /t:rebuild
|
||||
msbuild examples\examples.sln /V:m /p:Platform=x64 /p:Configuration=Debug /t:rebuild
|
||||
|
||||
11
buildispc.bat
Normal file
11
buildispc.bat
Normal file
@@ -0,0 +1,11 @@
|
||||
@echo off
|
||||
|
||||
REM If LLVM_INSTALL_DIR isn't set globally in your environment,
|
||||
REM it can be set here_
|
||||
set LLVM_INSTALL_DIR=c:\users\mmp\llvm-dev
|
||||
set LLVM_VERSION=3.1svn
|
||||
|
||||
REM Both the LLVM binaries and python need to be in the path
|
||||
set path=%LLVM_INSTALL_DIR%\bin;%PATH%;c:\cygwin\bin
|
||||
|
||||
msbuild ispc.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release
|
||||
189
builtins.cpp
189
builtins.cpp
@@ -99,6 +99,9 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
||||
|
||||
// varying
|
||||
if (LLVMTypes::MaskType != LLVMTypes::Int32VectorType &&
|
||||
t == LLVMTypes::MaskType)
|
||||
return AtomicType::VaryingBool;
|
||||
else if (t == LLVMTypes::Int8VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
|
||||
else if (t == LLVMTypes::Int16VectorType)
|
||||
@@ -194,7 +197,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
||||
// symbol creation code below assumes that any LLVM vector of i32s is a
|
||||
// varying int32. Here, we need that to be interpreted as a varying
|
||||
// bool, so just have a one-off override for that one...
|
||||
if (name == "__sext_varying_bool") {
|
||||
if (g->target.maskBitCount != 1 && name == "__sext_varying_bool") {
|
||||
const Type *returnType = AtomicType::VaryingInt32;
|
||||
std::vector<const Type *> argTypes;
|
||||
argTypes.push_back(AtomicType::VaryingBool);
|
||||
@@ -257,7 +260,7 @@ static void
|
||||
lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
#if 0
|
||||
// FIXME: handle globals?
|
||||
assert(module->global_empty());
|
||||
Assert(module->global_empty());
|
||||
#endif
|
||||
|
||||
llvm::Module::iterator iter;
|
||||
@@ -287,11 +290,11 @@ lCheckModuleIntrinsics(llvm::Module *module) {
|
||||
// check the llvm.x86.* intrinsics for now...
|
||||
if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) {
|
||||
llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID();
|
||||
assert(id != 0);
|
||||
LLVM_TYPE_CONST llvm::Type *intrinsicType =
|
||||
Assert(id != 0);
|
||||
llvm::Type *intrinsicType =
|
||||
llvm::Intrinsic::getType(*g->ctx, id);
|
||||
intrinsicType = llvm::PointerType::get(intrinsicType, 0);
|
||||
assert(func->getType() == intrinsicType);
|
||||
Assert(func->getType() == intrinsicType);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -311,8 +314,12 @@ lCheckModuleIntrinsics(llvm::Module *module) {
|
||||
static void
|
||||
lSetInternalFunctions(llvm::Module *module) {
|
||||
const char *names[] = {
|
||||
"__add_float",
|
||||
"__add_int32",
|
||||
"__add_uniform_double",
|
||||
"__add_uniform_int32",
|
||||
"__add_uniform_int64",
|
||||
"__add_varying_double",
|
||||
"__add_varying_int32",
|
||||
"__add_varying_int64",
|
||||
"__aos_to_soa3_float",
|
||||
@@ -371,18 +378,21 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__atomic_xor_uniform_int64_global",
|
||||
"__broadcast_double",
|
||||
"__broadcast_float",
|
||||
"__broadcast_int16",
|
||||
"__broadcast_int32",
|
||||
"__broadcast_int64",
|
||||
"__broadcast_int8",
|
||||
"__broadcast_i16",
|
||||
"__broadcast_i32",
|
||||
"__broadcast_i64",
|
||||
"__broadcast_i8",
|
||||
"__ceil_uniform_double",
|
||||
"__ceil_uniform_float",
|
||||
"__ceil_varying_double",
|
||||
"__ceil_varying_float",
|
||||
"__clock",
|
||||
"__count_trailing_zeros_i32",
|
||||
"__count_trailing_zeros_i64",
|
||||
"__count_leading_zeros_i32",
|
||||
"__count_leading_zeros_i64",
|
||||
"__delete_uniform",
|
||||
"__delete_varying",
|
||||
"__do_assert_uniform",
|
||||
"__do_assert_varying",
|
||||
"__do_print",
|
||||
@@ -401,12 +411,16 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__extract_int64",
|
||||
"__extract_int8",
|
||||
"__fastmath",
|
||||
"__float_to_half_uniform",
|
||||
"__float_to_half_varying",
|
||||
"__floatbits_uniform_int32",
|
||||
"__floatbits_varying_int32",
|
||||
"__floor_uniform_double",
|
||||
"__floor_uniform_float",
|
||||
"__floor_varying_double",
|
||||
"__floor_varying_float",
|
||||
"__half_to_float_uniform",
|
||||
"__half_to_float_varying",
|
||||
"__insert_int16",
|
||||
"__insert_int32",
|
||||
"__insert_int64",
|
||||
@@ -428,6 +442,12 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__max_varying_uint32",
|
||||
"__max_varying_uint64",
|
||||
"__memory_barrier",
|
||||
"__memcpy32",
|
||||
"__memcpy64",
|
||||
"__memmove32",
|
||||
"__memmove64",
|
||||
"__memset32",
|
||||
"__memset64",
|
||||
"__min_uniform_double",
|
||||
"__min_uniform_float",
|
||||
"__min_uniform_int32",
|
||||
@@ -441,6 +461,9 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__min_varying_uint32",
|
||||
"__min_varying_uint64",
|
||||
"__movmsk",
|
||||
"__new_uniform",
|
||||
"__new_varying32",
|
||||
"__new_varying64",
|
||||
"__num_cores",
|
||||
"__packed_load_active",
|
||||
"__packed_store_active",
|
||||
@@ -476,10 +499,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__reduce_min_uint64",
|
||||
"__rotate_double",
|
||||
"__rotate_float",
|
||||
"__rotate_int16",
|
||||
"__rotate_int32",
|
||||
"__rotate_int64",
|
||||
"__rotate_int8",
|
||||
"__rotate_i16",
|
||||
"__rotate_i32",
|
||||
"__rotate_i64",
|
||||
"__rotate_i8",
|
||||
"__round_uniform_double",
|
||||
"__round_uniform_float",
|
||||
"__round_varying_double",
|
||||
@@ -490,16 +513,16 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__sext_varying_bool",
|
||||
"__shuffle2_double",
|
||||
"__shuffle2_float",
|
||||
"__shuffle2_int16",
|
||||
"__shuffle2_int32",
|
||||
"__shuffle2_int64",
|
||||
"__shuffle2_int8",
|
||||
"__shuffle2_i16",
|
||||
"__shuffle2_i32",
|
||||
"__shuffle2_i64",
|
||||
"__shuffle2_i8",
|
||||
"__shuffle_double",
|
||||
"__shuffle_float",
|
||||
"__shuffle_int16",
|
||||
"__shuffle_int32",
|
||||
"__shuffle_int64",
|
||||
"__shuffle_int8",
|
||||
"__shuffle_i16",
|
||||
"__shuffle_i32",
|
||||
"__shuffle_i64",
|
||||
"__shuffle_i8",
|
||||
"__soa_to_aos3_float",
|
||||
"__soa_to_aos3_float16",
|
||||
"__soa_to_aos3_float4",
|
||||
@@ -514,6 +537,8 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__sqrt_uniform_float",
|
||||
"__sqrt_varying_double",
|
||||
"__sqrt_varying_float",
|
||||
"__stdlib_acosf",
|
||||
"__stdlib_asinf",
|
||||
"__stdlib_atan",
|
||||
"__stdlib_atan2",
|
||||
"__stdlib_atan2f",
|
||||
@@ -543,12 +568,16 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__svml_pow",
|
||||
"__undef_uniform",
|
||||
"__undef_varying",
|
||||
"__vec4_add_float",
|
||||
"__vec4_add_int32",
|
||||
"__vselect_float",
|
||||
"__vselect_i32",
|
||||
};
|
||||
|
||||
int count = sizeof(names) / sizeof(names[0]);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
llvm::Function *f = module->getFunction(names[i]);
|
||||
if (f != NULL)
|
||||
if (f != NULL && f->empty() == false)
|
||||
f->setLinkage(llvm::GlobalValue::InternalLinkage);
|
||||
}
|
||||
}
|
||||
@@ -583,17 +612,15 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
// linking together modules with incompatible target triples..
|
||||
llvm::Triple mTriple(m->module->getTargetTriple());
|
||||
llvm::Triple bcTriple(bcModule->getTargetTriple());
|
||||
assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
||||
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
||||
mTriple.getArch() == bcTriple.getArch());
|
||||
assert(bcTriple.getVendor() == llvm::Triple::UnknownVendor ||
|
||||
Assert(bcTriple.getVendor() == llvm::Triple::UnknownVendor ||
|
||||
mTriple.getVendor() == bcTriple.getVendor());
|
||||
bcModule->setTargetTriple(mTriple.str());
|
||||
|
||||
std::string(linkError);
|
||||
if (llvm::Linker::LinkModules(module, bcModule,
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::Linker::DestroySource,
|
||||
#endif // LLVM_3_0
|
||||
&linkError))
|
||||
Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
|
||||
lSetInternalFunctions(module);
|
||||
@@ -610,10 +637,11 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
static void
|
||||
lDefineConstantInt(const char *name, int val, llvm::Module *module,
|
||||
SymbolTable *symbolTable) {
|
||||
Symbol *pw = new Symbol(name, SourcePos(), AtomicType::UniformConstInt32,
|
||||
SC_STATIC);
|
||||
Symbol *pw =
|
||||
new Symbol(name, SourcePos(), AtomicType::UniformInt32->GetAsConstType(),
|
||||
SC_STATIC);
|
||||
pw->constValue = new ConstExpr(pw->type, val, SourcePos());
|
||||
LLVM_TYPE_CONST llvm::Type *ltype = LLVMTypes::Int32Type;
|
||||
llvm::Type *ltype = LLVMTypes::Int32Type;
|
||||
llvm::Constant *linit = LLVMInt32(val);
|
||||
pw->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
@@ -631,7 +659,7 @@ lDefineConstantIntFunc(const char *name, int val, llvm::Module *module,
|
||||
Symbol *sym = new Symbol(name, SourcePos(), ft, SC_STATIC);
|
||||
|
||||
llvm::Function *func = module->getFunction(name);
|
||||
assert(func != NULL); // it should be declared already...
|
||||
Assert(func != NULL); // it should be declared already...
|
||||
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
llvm::BasicBlock *bblock = llvm::BasicBlock::Create(*g->ctx, "entry", func, 0);
|
||||
llvm::ReturnInst::Create(*g->ctx, LLVMInt32(val), bblock);
|
||||
@@ -644,15 +672,16 @@ lDefineConstantIntFunc(const char *name, int val, llvm::Module *module,
|
||||
|
||||
static void
|
||||
lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
Symbol *pidx = new Symbol("programIndex", SourcePos(),
|
||||
AtomicType::VaryingConstInt32, SC_STATIC);
|
||||
Symbol *pidx =
|
||||
new Symbol("programIndex", SourcePos(),
|
||||
AtomicType::VaryingInt32->GetAsConstType(), SC_STATIC);
|
||||
|
||||
int pi[ISPC_MAX_NVEC];
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
pi[i] = i;
|
||||
pidx->constValue = new ConstExpr(pidx->type, pi, SourcePos());
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *ltype = LLVMTypes::Int32VectorType;
|
||||
llvm::Type *ltype = LLVMTypes::Int32VectorType;
|
||||
llvm::Constant *linit = LLVMInt32Vector(pi);
|
||||
pidx->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
|
||||
llvm::GlobalValue::InternalLinkage, linit,
|
||||
@@ -706,11 +735,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
extern int builtins_bitcode_sse4_x2_length;
|
||||
switch (g->target.vectorWidth) {
|
||||
case 4:
|
||||
AddBitcodeToModule(builtins_bitcode_sse4, builtins_bitcode_sse4_length,
|
||||
AddBitcodeToModule(builtins_bitcode_sse4,
|
||||
builtins_bitcode_sse4_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 8:
|
||||
AddBitcodeToModule(builtins_bitcode_sse4_x2, builtins_bitcode_sse4_x2_length,
|
||||
AddBitcodeToModule(builtins_bitcode_sse4_x2,
|
||||
builtins_bitcode_sse4_x2_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
default:
|
||||
@@ -720,21 +751,77 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
case Target::AVX:
|
||||
switch (g->target.vectorWidth) {
|
||||
case 8:
|
||||
extern unsigned char builtins_bitcode_avx[];
|
||||
extern int builtins_bitcode_avx_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx, builtins_bitcode_avx_length,
|
||||
extern unsigned char builtins_bitcode_avx1[];
|
||||
extern int builtins_bitcode_avx1_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx1,
|
||||
builtins_bitcode_avx1_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 16:
|
||||
extern unsigned char builtins_bitcode_avx_x2[];
|
||||
extern int builtins_bitcode_avx_x2_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx_x2, builtins_bitcode_avx_x2_length,
|
||||
extern unsigned char builtins_bitcode_avx1_x2[];
|
||||
extern int builtins_bitcode_avx1_x2_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx1_x2,
|
||||
builtins_bitcode_avx1_x2_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error in DefineStdlib");
|
||||
}
|
||||
break;
|
||||
case Target::AVX2:
|
||||
switch (g->target.vectorWidth) {
|
||||
case 8:
|
||||
extern unsigned char builtins_bitcode_avx2[];
|
||||
extern int builtins_bitcode_avx2_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx2,
|
||||
builtins_bitcode_avx2_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 16:
|
||||
extern unsigned char builtins_bitcode_avx2_x2[];
|
||||
extern int builtins_bitcode_avx2_x2_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx2_x2,
|
||||
builtins_bitcode_avx2_x2_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error in DefineStdlib");
|
||||
}
|
||||
break;
|
||||
case Target::GENERIC:
|
||||
switch (g->target.vectorWidth) {
|
||||
case 4:
|
||||
extern unsigned char builtins_bitcode_generic_4[];
|
||||
extern int builtins_bitcode_generic_4_length;
|
||||
AddBitcodeToModule(builtins_bitcode_generic_4,
|
||||
builtins_bitcode_generic_4_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 8:
|
||||
extern unsigned char builtins_bitcode_generic_8[];
|
||||
extern int builtins_bitcode_generic_8_length;
|
||||
AddBitcodeToModule(builtins_bitcode_generic_8,
|
||||
builtins_bitcode_generic_8_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 16:
|
||||
extern unsigned char builtins_bitcode_generic_16[];
|
||||
extern int builtins_bitcode_generic_16_length;
|
||||
AddBitcodeToModule(builtins_bitcode_generic_16,
|
||||
builtins_bitcode_generic_16_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 1:
|
||||
extern unsigned char builtins_bitcode_generic_1[];
|
||||
extern int builtins_bitcode_generic_1_length;
|
||||
AddBitcodeToModule(builtins_bitcode_generic_1,
|
||||
builtins_bitcode_generic_1_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error in DefineStdlib");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error");
|
||||
}
|
||||
@@ -759,14 +846,22 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module,
|
||||
symbolTable);
|
||||
|
||||
lDefineConstantInt("__have_native_half", (g->target.isa == Target::AVX2),
|
||||
module, symbolTable);
|
||||
|
||||
if (includeStdlibISPC) {
|
||||
// If the user wants the standard library to be included, parse the
|
||||
// serialized version of the stdlib.ispc file to get its
|
||||
// definitions added. Disable emission of performance warnings for
|
||||
// now, since the user doesn't care about any of that in the stdlib
|
||||
// implementation...
|
||||
extern char stdlib_code[];
|
||||
yy_scan_string(stdlib_code);
|
||||
yyparse();
|
||||
// definitions added.
|
||||
if (g->target.isa == Target::GENERIC&&g->target.vectorWidth!=1) { // 1 wide uses x86 stdlib
|
||||
extern char stdlib_generic_code[];
|
||||
yy_scan_string(stdlib_generic_code);
|
||||
yyparse();
|
||||
}
|
||||
else {
|
||||
extern char stdlib_x86_code[];
|
||||
yy_scan_string(stdlib_x86_code);
|
||||
yyparse();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,7 +149,7 @@ void __do_print(const char *format, const char *types, int width, int mask,
|
||||
|
||||
|
||||
int __num_cores() {
|
||||
#ifdef _MSC_VER
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
// This is quite a hack. Including all of windows.h to get this definition
|
||||
// pulls in a bunch of stuff that leads to undefined symbols at link time.
|
||||
// So we don't #include <windows.h> but instead have the equivalent declarations
|
||||
@@ -48,23 +48,42 @@ declare void @abort() noreturn
|
||||
;; corresponding to one of the Target::ISA enumerant values that gives the
|
||||
;; most capable ISA that the curremt system can run.
|
||||
;;
|
||||
;; #ifdef _MSC_VER
|
||||
;; extern void __stdcall __cpuid(int info[4], int infoType);
|
||||
;; #else
|
||||
;; Note: clang from LLVM 2.9 should be used if this is updated, for maximum
|
||||
;; backwards compatibility for anyone building ispc with LLVM 2.9.
|
||||
;;
|
||||
;; #include <stdint.h>
|
||||
;; #include <stdlib.h>
|
||||
;;
|
||||
;; static void __cpuid(int info[4], int infoType) {
|
||||
;; __asm__ __volatile__ ("cpuid"
|
||||
;; : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
|
||||
;; : "0" (infoType));
|
||||
;; }
|
||||
;; #endif
|
||||
;;
|
||||
;; /* Save %ebx in case it's the PIC register */
|
||||
;; static void __cpuid_count(int info[4], int level, int count) {
|
||||
;; __asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t"
|
||||
;; "cpuid\n\t"
|
||||
;; "xchg{l}\t{%%}ebx, %1\n\t"
|
||||
;; : "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3])
|
||||
;; : "0" (level), "2" (count));
|
||||
;; }
|
||||
;;
|
||||
;; int32_t __get_system_isa() {
|
||||
;; int info[4];
|
||||
;; __cpuid(info, 1);
|
||||
;;
|
||||
;; /* NOTE: the values returned below must be the same as the
|
||||
;; corresponding enumerant values in Target::ISA. */
|
||||
;; if ((info[2] & (1 << 28)) != 0)
|
||||
;; return 2; // AVX
|
||||
;; if ((info[2] & (1 << 28)) != 0) {
|
||||
;; // AVX1 for sure. Do we have AVX2?
|
||||
;; // Call cpuid with eax=7, ecx=0
|
||||
;; __cpuid_count(info, 7, 0);
|
||||
;; if ((info[1] & (1 << 5)) != 0)
|
||||
;; return 3; // AVX2
|
||||
;; else
|
||||
;; return 2; // AVX1
|
||||
;; }
|
||||
;; else if ((info[2] & (1 << 19)) != 0)
|
||||
;; return 1; // SSE4
|
||||
;; else if ((info[3] & (1 << 26)) != 0)
|
||||
@@ -76,33 +95,42 @@ declare void @abort() noreturn
|
||||
%0 = type { i32, i32, i32, i32 }
|
||||
|
||||
define i32 @__get_system_isa() nounwind ssp {
|
||||
%1 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
|
||||
%2 = extractvalue %0 %1, 2
|
||||
%3 = extractvalue %0 %1, 3
|
||||
%4 = and i32 %2, 268435456
|
||||
%5 = icmp eq i32 %4, 0
|
||||
br i1 %5, label %6, label %13
|
||||
entry:
|
||||
%0 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
|
||||
%asmresult9.i = extractvalue %0 %0, 2
|
||||
%asmresult10.i = extractvalue %0 %0, 3
|
||||
%and = and i32 %asmresult9.i, 268435456
|
||||
%cmp = icmp eq i32 %and, 0
|
||||
br i1 %cmp, label %if.else7, label %if.then
|
||||
|
||||
; <label>:6 ; preds = %0
|
||||
%7 = and i32 %2, 524288
|
||||
%8 = icmp eq i32 %7, 0
|
||||
br i1 %8, label %9, label %13
|
||||
if.then: ; preds = %entry
|
||||
%1 = tail call %0 asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind
|
||||
%asmresult9.i24 = extractvalue %0 %1, 1
|
||||
%and4 = lshr i32 %asmresult9.i24, 5
|
||||
%2 = and i32 %and4, 1
|
||||
%3 = or i32 %2, 2
|
||||
br label %return
|
||||
|
||||
; <label>:9 ; preds = %6
|
||||
%10 = and i32 %3, 67108864
|
||||
%11 = icmp eq i32 %10, 0
|
||||
br i1 %11, label %12, label %13
|
||||
if.else7: ; preds = %entry
|
||||
%and10 = and i32 %asmresult9.i, 524288
|
||||
%cmp11 = icmp eq i32 %and10, 0
|
||||
br i1 %cmp11, label %if.else13, label %return
|
||||
|
||||
; <label>:12 ; preds = %9
|
||||
if.else13: ; preds = %if.else7
|
||||
%and16 = and i32 %asmresult10.i, 67108864
|
||||
%cmp17 = icmp eq i32 %and16, 0
|
||||
br i1 %cmp17, label %if.else19, label %return
|
||||
|
||||
if.else19: ; preds = %if.else13
|
||||
tail call void @abort() noreturn nounwind
|
||||
unreachable
|
||||
|
||||
; <label>:13 ; preds = %9, %6, %0
|
||||
%.0 = phi i32 [ 2, %0 ], [ 1, %6 ], [ 0, %9 ]
|
||||
ret i32 %.0
|
||||
return: ; preds = %if.else13, %if.else7, %if.then
|
||||
%retval.0 = phi i32 [ %3, %if.then ], [ 1, %if.else7 ], [ 0, %if.else13 ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; This function is called by each of the dispatch functions we generate;
|
||||
;; it sets @__system_best_isa if it is unset.
|
||||
|
||||
@@ -32,6 +32,11 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; AVX target implementation.
|
||||
|
||||
ctlztz()
|
||||
define_prefetches()
|
||||
define_shuffles()
|
||||
aossoa()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
@@ -32,12 +32,16 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Basic 16-wide definitions
|
||||
|
||||
stdlib_core(16)
|
||||
packed_load_and_store(16)
|
||||
scans(16)
|
||||
int64minmax(16)
|
||||
define(`WIDTH',`16')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-avx-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-avx-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
@@ -166,33 +170,6 @@ define <16 x float> @__min_varying_float(<16 x float>,
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops
|
||||
|
||||
@@ -381,13 +358,13 @@ load_and_broadcast(16, i32, 32)
|
||||
load_and_broadcast(16, i64, 64)
|
||||
|
||||
; no masked load instruction for i8 and i16 types??
|
||||
load_masked(16, i8, 8, 1)
|
||||
load_masked(16, i16, 16, 2)
|
||||
masked_load(16, i8, 8, 1)
|
||||
masked_load(16, i16, 16, 2)
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8 *, <8 x float> %mask)
|
||||
declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8 *, <4 x double> %mask)
|
||||
|
||||
define <16 x i32> @__load_masked_32(i8 *, <16 x i32> %mask) nounwind alwaysinline {
|
||||
define <16 x i32> @__masked_load_32(i8 *, <16 x i32> %mask) nounwind alwaysinline {
|
||||
%floatmask = bitcast <16 x i32> %mask to <16 x float>
|
||||
%mask0 = shufflevector <16 x float> %floatmask, <16 x float> undef,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
@@ -405,7 +382,7 @@ define <16 x i32> @__load_masked_32(i8 *, <16 x i32> %mask) nounwind alwaysinlin
|
||||
}
|
||||
|
||||
|
||||
define <16 x i64> @__load_masked_64(i8 *, <16 x i32> %mask) nounwind alwaysinline {
|
||||
define <16 x i64> @__masked_load_64(i8 *, <16 x i32> %mask) nounwind alwaysinline {
|
||||
; double up masks, bitcast to doubles
|
||||
%mask0 = shufflevector <16 x i32> %mask, <16 x i32> undef,
|
||||
<8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
|
||||
@@ -618,12 +595,7 @@ define void @__masked_store_blend_64(<16 x i64>* nocapture %ptr, <16 x i64> %new
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
gen_gather(16, i8)
|
||||
gen_gather(16, i16)
|
||||
gen_gather(16, i32)
|
||||
gen_gather(16, i64)
|
||||
;; scatter
|
||||
|
||||
gen_scatter(16, i8)
|
||||
gen_scatter(16, i16)
|
||||
@@ -32,12 +32,16 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Basic 8-wide definitions
|
||||
|
||||
stdlib_core(8)
|
||||
packed_load_and_store(8)
|
||||
scans(8)
|
||||
int64minmax(8)
|
||||
define(`WIDTH',`8')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-avx-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-avx-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
@@ -166,33 +170,6 @@ define <8 x float> @__min_varying_float(<8 x float>,
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops
|
||||
|
||||
@@ -234,7 +211,7 @@ reduce_equal(8)
|
||||
;; horizontal int32 ops
|
||||
|
||||
define <8 x i32> @__add_varying_int32(<8 x i32>,
|
||||
<8 x i32>) nounwind readnone alwaysinline {
|
||||
<8 x i32>) nounwind readnone alwaysinline {
|
||||
%s = add <8 x i32> %0, %1
|
||||
ret <8 x i32> %s
|
||||
}
|
||||
@@ -310,7 +287,7 @@ define double @__reduce_max_double(<8 x double>) nounwind readnone alwaysinline
|
||||
;; horizontal int64 ops
|
||||
|
||||
define <8 x i64> @__add_varying_int64(<8 x i64>,
|
||||
<8 x i64>) nounwind readnone alwaysinline {
|
||||
<8 x i64>) nounwind readnone alwaysinline {
|
||||
%s = add <8 x i64> %0, %1
|
||||
ret <8 x i64> %s
|
||||
}
|
||||
@@ -362,13 +339,13 @@ load_and_broadcast(8, i32, 32)
|
||||
load_and_broadcast(8, i64, 64)
|
||||
|
||||
; no masked load instruction for i8 and i16 types??
|
||||
load_masked(8, i8, 8, 1)
|
||||
load_masked(8, i16, 16, 2)
|
||||
masked_load(8, i8, 8, 1)
|
||||
masked_load(8, i16, 16, 2)
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8 *, <8 x float> %mask)
|
||||
declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8 *, <4 x double> %mask)
|
||||
|
||||
define <8 x i32> @__load_masked_32(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
define <8 x i32> @__masked_load_32(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%floatmask = bitcast <8 x i32> %mask to <8 x float>
|
||||
%floatval = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8 * %0, <8 x float> %floatmask)
|
||||
%retval = bitcast <8 x float> %floatval to <8 x i32>
|
||||
@@ -376,7 +353,7 @@ define <8 x i32> @__load_masked_32(i8 *, <8 x i32> %mask) nounwind alwaysinline
|
||||
}
|
||||
|
||||
|
||||
define <8 x i64> @__load_masked_64(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
define <8 x i64> @__masked_load_64(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
; double up masks, bitcast to doubles
|
||||
%mask0 = shufflevector <8 x i32> %mask, <8 x i32> undef,
|
||||
<8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
|
||||
@@ -399,9 +376,6 @@ define <8 x i64> @__load_masked_64(i8 *, <8 x i32> %mask) nounwind alwaysinline
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
; FIXME: there is no AVX instruction for these, but we could be clever
|
||||
; by packing the bits down and setting the last 3/4 or half, respectively,
|
||||
; of the mask to zero... Not sure if this would be a win in the end
|
||||
gen_masked_store(8, i8, 8)
|
||||
gen_masked_store(8, i16, 16)
|
||||
|
||||
@@ -516,12 +490,7 @@ define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
gen_gather(8, i8)
|
||||
gen_gather(8, i16)
|
||||
gen_gather(8, i32)
|
||||
gen_gather(8, i64)
|
||||
;; scatter
|
||||
|
||||
gen_scatter(8, i8)
|
||||
gen_scatter(8, i16)
|
||||
77
builtins/target-avx1-x2.ll
Normal file
77
builtins/target-avx1-x2.ll
Normal file
@@ -0,0 +1,77 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(`target-avx-x2.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
gen_gather(16, i8)
|
||||
gen_gather(16, i16)
|
||||
gen_gather(16, i32)
|
||||
gen_gather(16, i64)
|
||||
|
||||
|
||||
75
builtins/target-avx1.ll
Normal file
75
builtins/target-avx1.ll
Normal file
@@ -0,0 +1,75 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(`target-avx.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
gen_gather(8, i8)
|
||||
gen_gather(8, i16)
|
||||
gen_gather(8, i32)
|
||||
gen_gather(8, i64)
|
||||
129
builtins/target-avx2-x2.ll
Normal file
129
builtins/target-avx2-x2.ll
Normal file
@@ -0,0 +1,129 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(`target-avx-x2.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
|
||||
define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary8to16(m, i32, @llvm.x86.avx2.pmins.d, %0, %1)
|
||||
ret <16 x i32> %m
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary8to16(m, i32, @llvm.x86.avx2.pmaxs.d, %0, %1)
|
||||
ret <16 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
|
||||
define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary8to16(m, i32, @llvm.x86.avx2.pminu.d, %0, %1)
|
||||
ret <16 x i32> %m
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary8to16(m, i32, @llvm.x86.avx2.pmaxu.d, %0, %1)
|
||||
ret <16 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float/half conversions
|
||||
|
||||
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
|
||||
; 0 is round nearest even
|
||||
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone {
|
||||
%r_0 = shufflevector <16 x i16> %v, <16 x i16> undef,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%vr_0 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_0)
|
||||
%r_1 = shufflevector <16 x i16> %v, <16 x i16> undef,
|
||||
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vr_1 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_1)
|
||||
%r = shufflevector <8 x float> %vr_0, <8 x float> %vr_1,
|
||||
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <16 x float> %r
|
||||
}
|
||||
|
||||
define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone {
|
||||
%r_0 = shufflevector <16 x float> %v, <16 x float> undef,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%vr_0 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_0, i32 0)
|
||||
%r_1 = shufflevector <16 x float> %v, <16 x float> undef,
|
||||
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vr_1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_1, i32 0)
|
||||
%r = shufflevector <8 x i16> %vr_0, <8 x i16> %vr_1,
|
||||
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <16 x i16> %r
|
||||
}
|
||||
|
||||
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||
%v1 = bitcast i16 %v to <1 x i16>
|
||||
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
|
||||
%r = extractelement <8 x float> %rv, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||
%v1 = bitcast float %v to <1 x float>
|
||||
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; round to nearest even
|
||||
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
|
||||
%r = extractelement <8 x i16> %rv, i32 0
|
||||
ret i16 %r
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
gen_gather(16, i8)
|
||||
gen_gather(16, i16)
|
||||
gen_gather(16, i32)
|
||||
gen_gather(16, i64)
|
||||
|
||||
|
||||
110
builtins/target-avx2.ll
Normal file
110
builtins/target-avx2.ll
Normal file
@@ -0,0 +1,110 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(`target-avx.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
|
||||
define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %m
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
|
||||
define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %m
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float/half conversions
|
||||
|
||||
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
|
||||
; 0 is round nearest even
|
||||
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
|
||||
|
||||
define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone {
|
||||
%r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
|
||||
ret <8 x float> %r
|
||||
}
|
||||
|
||||
define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone {
|
||||
%r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
define float @__half_to_float_uniform(i16 %v) nounwind readnone {
|
||||
%v1 = bitcast i16 %v to <1 x i16>
|
||||
%vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
|
||||
%r = extractelement <8 x float> %rv, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i16 @__float_to_half_uniform(float %v) nounwind readnone {
|
||||
%v1 = bitcast float %v to <1 x float>
|
||||
%vv = shufflevector <1 x float> %v1, <1 x float> undef,
|
||||
<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
|
||||
i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; round to nearest even
|
||||
%rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
|
||||
%r = extractelement <8 x i16> %rv, i32 0
|
||||
ret i16 %r
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
gen_gather(8, i8)
|
||||
gen_gather(8, i16)
|
||||
gen_gather(8, i32)
|
||||
gen_gather(8, i64)
|
||||
935
builtins/target-generic-1.ll
Executable file
935
builtins/target-generic-1.ll
Executable file
@@ -0,0 +1,935 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Define the standard library builtins for the NOVEC target
|
||||
define(`MASK',`i32')
|
||||
define(`WIDTH',`1')
|
||||
include(`util.m4')
|
||||
; Define some basics for a 1-wide target
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
aossoa()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
gen_masked_store(1, i8, 8)
|
||||
gen_masked_store(1, i16, 16)
|
||||
gen_masked_store(1, i32, 32)
|
||||
gen_masked_store(1, i64, 64)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
|
||||
load_and_broadcast(1, i8, 8)
|
||||
load_and_broadcast(1, i16, 16)
|
||||
load_and_broadcast(1, i32, 32)
|
||||
load_and_broadcast(1, i64, 64)
|
||||
|
||||
masked_load(1, i8, 8, 1)
|
||||
masked_load(1, i16, 16, 2)
|
||||
masked_load(1, i32, 32, 4)
|
||||
masked_load(1, i64, 64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
; define these with the macros from stdlib.m4
|
||||
|
||||
gen_gather(1, i8)
|
||||
gen_gather(1, i16)
|
||||
gen_gather(1, i32)
|
||||
gen_gather(1, i64)
|
||||
|
||||
gen_scatter(1, i8)
|
||||
gen_scatter(1, i16)
|
||||
gen_scatter(1, i32)
|
||||
gen_scatter(1, i64)
|
||||
|
||||
|
||||
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
|
||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||
; %mv = trunc <1 x i32> %mask to <1 x i8>
|
||||
; %notmask = xor <1 x i8> %mv, <i8 -1>
|
||||
; %cleared_old = and <1 x i8> %0, %notmask
|
||||
; %masked_new = and <1 x i8> %1, %mv
|
||||
; %new = or <1 x i8> %cleared_old, %masked_new
|
||||
; ret <1 x i8> %new
|
||||
|
||||
; not doing this the easy way because of problems with LLVM's scalarizer
|
||||
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
|
||||
; %sel = select <1 x i1> %cmp, <1 x i8> %0, <1 x i8> %1
|
||||
%m = extractelement <1 x i32> %mask, i32 0
|
||||
%cmp = icmp eq i32 %m, 0
|
||||
%d0 = extractelement <1 x i8> %0, i32 0
|
||||
%d1 = extractelement <1 x i8> %1, i32 0
|
||||
%sel = select i1 %cmp, i8 %d0, i8 %d1
|
||||
%r = insertelement <1 x i8> undef, i8 %sel, i32 0
|
||||
ret <1 x i8> %r
|
||||
}
|
||||
|
||||
define <1 x i16> @__vselect_i16(<1 x i16>, <1 x i16> ,
|
||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||
; %mv = trunc <1 x i32> %mask to <1 x i16>
|
||||
; %notmask = xor <1 x i16> %mv, <i16 -1>
|
||||
; %cleared_old = and <1 x i16> %0, %notmask
|
||||
; %masked_new = and <1 x i16> %1, %mv
|
||||
; %new = or <1 x i16> %cleared_old, %masked_new
|
||||
; ret <1 x i16> %new
|
||||
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
|
||||
; %sel = select <1 x i1> %cmp, <1 x i16> %0, <1 x i16> %1
|
||||
%m = extractelement <1 x i32> %mask, i32 0
|
||||
%cmp = icmp eq i32 %m, 0
|
||||
%d0 = extractelement <1 x i16> %0, i32 0
|
||||
%d1 = extractelement <1 x i16> %1, i32 0
|
||||
%sel = select i1 %cmp, i16 %d0, i16 %d1
|
||||
%r = insertelement <1 x i16> undef, i16 %sel, i32 0
|
||||
ret <1 x i16> %r
|
||||
|
||||
; ret <1 x i16> %sel
|
||||
}
|
||||
|
||||
|
||||
define <1 x i32> @__vselect_i32(<1 x i32>, <1 x i32> ,
|
||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||
; %notmask = xor <1 x i32> %mask, <i32 -1>
|
||||
; %cleared_old = and <1 x i32> %0, %notmask
|
||||
; %masked_new = and <1 x i32> %1, %mask
|
||||
; %new = or <1 x i32> %cleared_old, %masked_new
|
||||
; ret <1 x i32> %new
|
||||
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
|
||||
; %sel = select <1 x i1> %cmp, <1 x i32> %0, <1 x i32> %1
|
||||
; ret <1 x i32> %sel
|
||||
%m = extractelement <1 x i32> %mask, i32 0
|
||||
%cmp = icmp eq i32 %m, 0
|
||||
%d0 = extractelement <1 x i32> %0, i32 0
|
||||
%d1 = extractelement <1 x i32> %1, i32 0
|
||||
%sel = select i1 %cmp, i32 %d0, i32 %d1
|
||||
%r = insertelement <1 x i32> undef, i32 %sel, i32 0
|
||||
ret <1 x i32> %r
|
||||
|
||||
}
|
||||
define <1 x i64> @__vselect_i64(<1 x i64>, <1 x i64> ,
|
||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||
; %newmask = zext <1 x i32> %mask to <1 x i64>
|
||||
; %notmask = xor <1 x i64> %newmask, <i64 -1>
|
||||
; %cleared_old = and <1 x i64> %0, %notmask
|
||||
; %masked_new = and <1 x i64> %1, %newmask
|
||||
; %new = or <1 x i64> %cleared_old, %masked_new
|
||||
; ret <1 x i64> %new
|
||||
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
|
||||
; %sel = select <1 x i1> %cmp, <1 x i64> %0, <1 x i64> %1
|
||||
; ret <1 x i64> %sel
|
||||
%m = extractelement <1 x i32> %mask, i32 0
|
||||
%cmp = icmp eq i32 %m, 0
|
||||
%d0 = extractelement <1 x i64> %0, i32 0
|
||||
%d1 = extractelement <1 x i64> %1, i32 0
|
||||
%sel = select i1 %cmp, i64 %d0, i64 %d1
|
||||
%r = insertelement <1 x i64> undef, i64 %sel, i32 0
|
||||
ret <1 x i64> %r
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__vselect_float(<1 x float>, <1 x float>,
|
||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||
; %v0 = bitcast <1 x float> %0 to <1 x i32>
|
||||
; %v1 = bitcast <1 x float> %1 to <1 x i32>
|
||||
; %r = call <1 x i32> @__vselect_i32(<1 x i32> %v0, <1 x i32> %v1, <1 x i32> %mask)
|
||||
; %rf = bitcast <1 x i32> %r to <1 x float>
|
||||
; ret <1 x float> %rf
|
||||
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
|
||||
; %sel = select <1 x i1> %cmp, <1 x float> %0, <1 x float> %1
|
||||
; ret <1 x float> %sel
|
||||
%m = extractelement <1 x i32> %mask, i32 0
|
||||
%cmp = icmp eq i32 %m, 0
|
||||
%d0 = extractelement <1 x float> %0, i32 0
|
||||
%d1 = extractelement <1 x float> %1, i32 0
|
||||
%sel = select i1 %cmp, float %d0, float %d1
|
||||
%r = insertelement <1 x float> undef, float %sel, i32 0
|
||||
ret <1 x float> %r
|
||||
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
define void @__masked_store_blend_8(<1 x i8>* nocapture, <1 x i8>,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%val = load <1 x i8> * %0, align 4
|
||||
%newval = call <1 x i8> @__vselect_i8(<1 x i8> %val, <1 x i8> %1, <1 x i32> %mask)
|
||||
store <1 x i8> %newval, <1 x i8> * %0, align 4
|
||||
ret void
|
||||
}
|
||||
define void @__masked_store_blend_16(<1 x i16>* nocapture, <1 x i16>,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%val = load <1 x i16> * %0, align 4
|
||||
%newval = call <1 x i16> @__vselect_i16(<1 x i16> %val, <1 x i16> %1, <1 x i32> %mask)
|
||||
store <1 x i16> %newval, <1 x i16> * %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @__masked_store_blend_32(<1 x i32>* nocapture, <1 x i32>,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%val = load <1 x i32> * %0, align 4
|
||||
%newval = call <1 x i32> @__vselect_i32(<1 x i32> %val, <1 x i32> %1, <1 x i32> %mask)
|
||||
store <1 x i32> %newval, <1 x i32> * %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_64(<1 x i64>* nocapture, <1 x i64>,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%val = load <1 x i64> * %0, align 4
|
||||
%newval = call <1 x i64> @__vselect_i64(<1 x i64> %val, <1 x i64> %1, <1 x i32> %mask)
|
||||
store <1 x i64> %newval, <1 x i64> * %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @__movmsk(<1 x i32>) nounwind readnone alwaysinline {
|
||||
%item = extractelement <1 x i32> %0, i32 0
|
||||
%v = lshr i32 %item, 31
|
||||
ret i32 %v
|
||||
}
|
||||
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding
|
||||
;;
|
||||
;; There are not any rounding instructions in SSE2, so we have to emulate
|
||||
;; the functionality with multiple instructions...
|
||||
|
||||
; The code for __round_* is the result of compiling the following source
|
||||
; code.
|
||||
;
|
||||
; export float Round(float x) {
|
||||
; unsigned int sign = signbits(x);
|
||||
; unsigned int ix = intbits(x);
|
||||
; ix ^= sign;
|
||||
; x = floatbits(ix);
|
||||
; x += 0x1.0p23f;
|
||||
; x -= 0x1.0p23f;
|
||||
; ix = intbits(x);
|
||||
; ix ^= sign;
|
||||
; x = floatbits(ix);
|
||||
; return x;
|
||||
;}
|
||||
|
||||
define <1 x float> @__round_varying_float(<1 x float>) nounwind readonly alwaysinline {
|
||||
%float_to_int_bitcast.i.i.i.i = bitcast <1 x float> %0 to <1 x i32>
|
||||
%bitop.i.i = and <1 x i32> %float_to_int_bitcast.i.i.i.i, <i32 -2147483648>
|
||||
%bitop.i = xor <1 x i32> %float_to_int_bitcast.i.i.i.i, %bitop.i.i
|
||||
%int_to_float_bitcast.i.i40.i = bitcast <1 x i32> %bitop.i to <1 x float>
|
||||
%binop.i = fadd <1 x float> %int_to_float_bitcast.i.i40.i, <float 8.388608e+06>
|
||||
%binop21.i = fadd <1 x float> %binop.i, <float -8.388608e+06>
|
||||
%float_to_int_bitcast.i.i.i = bitcast <1 x float> %binop21.i to <1 x i32>
|
||||
%bitop31.i = xor <1 x i32> %float_to_int_bitcast.i.i.i, %bitop.i.i
|
||||
%int_to_float_bitcast.i.i.i = bitcast <1 x i32> %bitop31.i to <1 x float>
|
||||
ret <1 x float> %int_to_float_bitcast.i.i.i
|
||||
}
|
||||
|
||||
;; Similarly, for implementations of the __floor* functions below, we have the
|
||||
;; bitcode from compiling the following source code...
|
||||
|
||||
;export float Floor(float x) {
|
||||
; float y = Round(x);
|
||||
; unsigned int cmp = y > x ? 0xffffffff : 0;
|
||||
; float delta = -1.f;
|
||||
; unsigned int idelta = intbits(delta);
|
||||
; idelta &= cmp;
|
||||
; delta = floatbits(idelta);
|
||||
; return y + delta;
|
||||
;}
|
||||
|
||||
define <1 x float> @__floor_varying_float(<1 x float>) nounwind readonly alwaysinline {
|
||||
%calltmp.i = tail call <1 x float> @__round_varying_float(<1 x float> %0) nounwind
|
||||
%bincmp.i = fcmp ogt <1 x float> %calltmp.i, %0
|
||||
%val_to_boolvec32.i = sext <1 x i1> %bincmp.i to <1 x i32>
|
||||
%bitop.i = and <1 x i32> %val_to_boolvec32.i, <i32 -1082130432>
|
||||
%int_to_float_bitcast.i.i.i = bitcast <1 x i32> %bitop.i to <1 x float>
|
||||
%binop.i = fadd <1 x float> %calltmp.i, %int_to_float_bitcast.i.i.i
|
||||
ret <1 x float> %binop.i
|
||||
}
|
||||
|
||||
;; And here is the code we compiled to get the __ceil* functions below
|
||||
;
|
||||
;export uniform float Ceil(uniform float x) {
|
||||
; uniform float y = Round(x);
|
||||
; uniform int yltx = y < x ? 0xffffffff : 0;
|
||||
; uniform float delta = 1.f;
|
||||
; uniform int idelta = intbits(delta);
|
||||
; idelta &= yltx;
|
||||
; delta = floatbits(idelta);
|
||||
; return y + delta;
|
||||
;}
|
||||
|
||||
define <1 x float> @__ceil_varying_float(<1 x float>) nounwind readonly alwaysinline {
|
||||
%calltmp.i = tail call <1 x float> @__round_varying_float(<1 x float> %0) nounwind
|
||||
%bincmp.i = fcmp olt <1 x float> %calltmp.i, %0
|
||||
%val_to_boolvec32.i = sext <1 x i1> %bincmp.i to <1 x i32>
|
||||
%bitop.i = and <1 x i32> %val_to_boolvec32.i, <i32 1065353216>
|
||||
%int_to_float_bitcast.i.i.i = bitcast <1 x i32> %bitop.i to <1 x float>
|
||||
%binop.i = fadd <1 x float> %calltmp.i, %int_to_float_bitcast.i.i.i
|
||||
ret <1 x float> %binop.i
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding doubles
|
||||
|
||||
; expecting math lib to provide this
|
||||
declare double @ceil (double) nounwind readnone
|
||||
declare double @floor (double) nounwind readnone
|
||||
declare double @round (double) nounwind readnone
|
||||
;declare float @llvm.sqrt.f32(float %Val)
|
||||
declare double @llvm.sqrt.f64(double %Val)
|
||||
declare float @llvm.sin.f32(float %Val)
|
||||
declare float @llvm.cos.f32(float %Val)
|
||||
declare float @llvm.sqrt.f32(float %Val)
|
||||
declare float @llvm.exp.f32(float %Val)
|
||||
declare float @llvm.log.f32(float %Val)
|
||||
declare float @llvm.pow.f32(float %f, float %e)
|
||||
|
||||
|
||||
|
||||
|
||||
;; stuff that could be in builtins ...
|
||||
|
||||
define(`unary1to1', `
|
||||
%v_0 = extractelement <1 x $1> %0, i32 0
|
||||
%r_0 = call $1 $2($1 %v_0)
|
||||
%ret_0 = insertelement <1 x $1> undef, $1 %r_0, i32 0
|
||||
ret <1 x $1> %ret_0
|
||||
')
|
||||
|
||||
|
||||
|
||||
;; dummy 1 wide vector ops
|
||||
define void
|
||||
@__aos_to_soa4_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2,
|
||||
<1 x float> %v3, <1 x float> * noalias %out0,
|
||||
<1 x float> * noalias %out1, <1 x float> * noalias %out2,
|
||||
<1 x float> * noalias %out3) nounwind alwaysinline {
|
||||
|
||||
store <1 x float> %v0, <1 x float > * %out0
|
||||
store <1 x float> %v1, <1 x float > * %out1
|
||||
store <1 x float> %v2, <1 x float > * %out2
|
||||
store <1 x float> %v3, <1 x float > * %out3
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void
|
||||
@__soa_to_aos4_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2,
|
||||
<1 x float> %v3, <1 x float> * noalias %out0,
|
||||
<1 x float> * noalias %out1, <1 x float> * noalias %out2,
|
||||
<1 x float> * noalias %out3) nounwind alwaysinline {
|
||||
call void @__aos_to_soa4_float1(<1 x float> %v0, <1 x float> %v1,
|
||||
<1 x float> %v2, <1 x float> %v3, <1 x float> * %out0,
|
||||
<1 x float> * %out1, <1 x float> * %out2, <1 x float> * %out3)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void
|
||||
@__aos_to_soa3_float1(<1 x float> %v0, <1 x float> %v1,
|
||||
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
|
||||
<1 x float> * %out2) {
|
||||
store <1 x float> %v0, <1 x float > * %out0
|
||||
store <1 x float> %v1, <1 x float > * %out1
|
||||
store <1 x float> %v2, <1 x float > * %out2
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void
|
||||
@__soa_to_aos3_float1(<1 x float> %v0, <1 x float> %v1,
|
||||
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
|
||||
<1 x float> * %out2) {
|
||||
call void @__aos_to_soa3_float1(<1 x float> %v0, <1 x float> %v1,
|
||||
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
|
||||
<1 x float> * %out2)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;; end builtins
|
||||
|
||||
|
||||
define <1 x double> @__round_varying_double(<1 x double>) nounwind readonly alwaysinline {
|
||||
unary1to1(double, @round)
|
||||
}
|
||||
|
||||
define <1 x double> @__floor_varying_double(<1 x double>) nounwind readonly alwaysinline {
|
||||
unary1to1(double, @floor)
|
||||
}
|
||||
|
||||
|
||||
define <1 x double> @__ceil_varying_double(<1 x double>) nounwind readonly alwaysinline {
|
||||
unary1to1(double, @ceil)
|
||||
}
|
||||
|
||||
; To do vector integer min and max, we do the vector compare and then sign
|
||||
; extend the i1 vector result to an i32 mask. The __vselect does the
|
||||
; rest...
|
||||
|
||||
define <1 x i32> @__min_varying_int32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp slt <1 x i32> %0, %1
|
||||
%mask = sext <1 x i1> %c to <1 x i32>
|
||||
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
|
||||
ret <1 x i32> %v
|
||||
}
|
||||
|
||||
define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp slt i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define <1 x i32> @__max_varying_int32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp sgt <1 x i32> %0, %1
|
||||
%mask = sext <1 x i1> %c to <1 x i32>
|
||||
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
|
||||
ret <1 x i32> %v
|
||||
}
|
||||
|
||||
define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp sgt i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; The functions for unsigned ints are similar, just with unsigned
|
||||
; comparison functions...
|
||||
|
||||
define <1 x i32> @__min_varying_uint32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp ult <1 x i32> %0, %1
|
||||
%mask = sext <1 x i1> %c to <1 x i32>
|
||||
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
|
||||
ret <1 x i32> %v
|
||||
}
|
||||
|
||||
define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp ult i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define <1 x i32> @__max_varying_uint32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp ugt <1 x i32> %0, %1
|
||||
%mask = sext <1 x i1> %c to <1 x i32>
|
||||
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
|
||||
ret <1 x i32> %v
|
||||
}
|
||||
|
||||
define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp ugt i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops / reductions
|
||||
|
||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||
|
||||
define i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
|
||||
%call = call i32 @llvm.ctpop.i32(i32 %0)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
||||
|
||||
define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
|
||||
%call = call i64 @llvm.ctpop.i64(i64 %0)
|
||||
ret i64 %call
|
||||
}
|
||||
|
||||
|
||||
define float @__reduce_add_float(<1 x float> %v) nounwind readonly alwaysinline {
|
||||
%r = extractelement <1 x float> %v, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__reduce_min_float(<1 x float>) nounwind readnone {
|
||||
%r = extractelement <1 x float> %0, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__reduce_max_float(<1 x float>) nounwind readnone {
|
||||
%r = extractelement <1 x float> %0, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_add_int32(<1 x i32> %v) nounwind readnone {
|
||||
%r = extractelement <1 x i32> %v, i32 0
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_min_int32(<1 x i32>) nounwind readnone {
|
||||
%r = extractelement <1 x i32> %0, i32 0
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_max_int32(<1 x i32>) nounwind readnone {
|
||||
%r = extractelement <1 x i32> %0, i32 0
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_add_uint32(<1 x i32> %v) nounwind readnone {
|
||||
%r = call i32 @__reduce_add_int32(<1 x i32> %v)
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_min_uint32(<1 x i32>) nounwind readnone {
|
||||
%r = extractelement <1 x i32> %0, i32 0
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_max_uint32(<1 x i32>) nounwind readnone {
|
||||
%r = extractelement <1 x i32> %0, i32 0
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
|
||||
define double @__reduce_add_double(<1 x double>) nounwind readnone {
|
||||
%m = extractelement <1 x double> %0, i32 0
|
||||
ret double %m
|
||||
}
|
||||
|
||||
define double @__reduce_min_double(<1 x double>) nounwind readnone {
|
||||
%m = extractelement <1 x double> %0, i32 0
|
||||
ret double %m
|
||||
}
|
||||
|
||||
define double @__reduce_max_double(<1 x double>) nounwind readnone {
|
||||
%m = extractelement <1 x double> %0, i32 0
|
||||
ret double %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_add_int64(<1 x i64>) nounwind readnone {
|
||||
%m = extractelement <1 x i64> %0, i32 0
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_min_int64(<1 x i64>) nounwind readnone {
|
||||
%m = extractelement <1 x i64> %0, i32 0
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_max_int64(<1 x i64>) nounwind readnone {
|
||||
%m = extractelement <1 x i64> %0, i32 0
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_min_uint64(<1 x i64>) nounwind readnone {
|
||||
%m = extractelement <1 x i64> %0, i32 0
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_max_uint64(<1 x i64>) nounwind readnone {
|
||||
%m = extractelement <1 x i64> %0, i32 0
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i1 @__reduce_equal_int32(<1 x i32> %vv, i32 * %samevalue,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%v=extractelement <1 x i32> %vv, i32 0
|
||||
store i32 %v, i32 * %samevalue
|
||||
ret i1 true
|
||||
|
||||
}
|
||||
|
||||
define i1 @__reduce_equal_float(<1 x float> %vv, float * %samevalue,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%v=extractelement <1 x float> %vv, i32 0
|
||||
store float %v, float * %samevalue
|
||||
ret i1 true
|
||||
|
||||
}
|
||||
|
||||
define i1 @__reduce_equal_int64(<1 x i64> %vv, i64 * %samevalue,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%v=extractelement <1 x i64> %vv, i32 0
|
||||
store i64 %v, i64 * %samevalue
|
||||
ret i1 true
|
||||
|
||||
}
|
||||
|
||||
define i1 @__reduce_equal_double(<1 x double> %vv, double * %samevalue,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%v=extractelement <1 x double> %vv, i32 0
|
||||
store double %v, double * %samevalue
|
||||
ret i1 true
|
||||
|
||||
}
|
||||
|
||||
; extracting/reinserting elements because I want to be able to remove vectors later on
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
define <1 x float> @__rcp_varying_float(<1 x float>) nounwind readonly alwaysinline {
|
||||
;%call = call <1 x float> @llvm.x86.sse.rcp.ps(<1 x float> %0)
|
||||
; do one N-R iteration to improve precision
|
||||
; float iv = __rcp_v(v);
|
||||
; return iv * (2. - v * iv);
|
||||
;%v_iv = fmul <1 x float> %0, %call
|
||||
;%two_minus = fsub <1 x float> <float 2., float 2., float 2., float 2.>, %v_iv
|
||||
;%iv_mul = fmul <1 x float> %call, %two_minus
|
||||
;ret <1 x float> %iv_mul
|
||||
%d = extractelement <1 x float> %0, i32 0
|
||||
%r = fdiv float 1.,%d
|
||||
%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
ret <1 x float> %rv
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; sqrt
|
||||
|
||||
define <1 x float> @__sqrt_varying_float(<1 x float>) nounwind readonly alwaysinline {
|
||||
;%call = call <1 x float> @llvm.x86.sse.sqrt.ps(<1 x float> %0)
|
||||
;ret <1 x float> %call
|
||||
%d = extractelement <1 x float> %0, i32 0
|
||||
%r = call float @llvm.sqrt.f32(float %d)
|
||||
%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
ret <1 x float> %rv
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; rsqrt
|
||||
|
||||
define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alwaysinline {
|
||||
; float is = __rsqrt_v(v);
|
||||
;%is = call <1 x float> @llvm.x86.sse.rsqrt.ps(<1 x float> %v)
|
||||
; Newton-Raphson iteration to improve precision
|
||||
; return 0.5 * is * (3. - (v * is) * is);
|
||||
;%v_is = fmul <1 x float> %v, %is
|
||||
;%v_is_is = fmul <1 x float> %v_is, %is
|
||||
;%three_sub = fsub <1 x float> <float 3., float 3., float 3., float 3.>, %v_is_is
|
||||
;%is_mul = fmul <1 x float> %is, %three_sub
|
||||
;%half_scale = fmul <1 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, %is_mul
|
||||
;ret <1 x float> %half_scale
|
||||
%s = call <1 x float> @__sqrt_varying_float(<1 x float> %v)
|
||||
%r = call <1 x float> @__rcp_varying_float(<1 x float> %s)
|
||||
ret <1 x float> %r
|
||||
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; svml stuff
|
||||
|
||||
define <1 x float> @__svml_sin(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_sinf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm.sin.f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
unary1to1(float,@llvm.sin.f32)
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_cos(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_cosf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm.cos.f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
unary1to1(float, @llvm.cos.f32)
|
||||
|
||||
}
|
||||
|
||||
define void @__svml_sincos(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline {
|
||||
; %s = call <1 x float> @__svml_sincosf4(<1 x float> * %2, <1 x float> %0)
|
||||
; store <1 x float> %s, <1 x float> * %1
|
||||
; ret void
|
||||
%sin = call <1 x float> @__svml_sin (<1 x float> %0)
|
||||
%cos = call <1 x float> @__svml_cos (<1 x float> %0)
|
||||
store <1 x float> %sin, <1 x float> * %1
|
||||
store <1 x float> %cos, <1 x float> * %2
|
||||
ret void
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_tan(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_tanf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm_tan_f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
;unasry1to1(float, @llvm.tan.f32)
|
||||
; UNSUPPORTED!
|
||||
ret <1 x float > %0
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_atan(<1 x float>) nounwind readnone alwaysinline {
|
||||
; %ret = call <1 x float> @__svml_atanf4(<1 x float> %0)
|
||||
; ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm_atan_f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
;unsary1to1(float,@llvm.atan.f32)
|
||||
;UNSUPPORTED!
|
||||
ret <1 x float > %0
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_atan2(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_atan2f4(<1 x float> %0, <1 x float> %1)
|
||||
;ret <1 x float> %ret
|
||||
;%y = extractelement <1 x float> %0, i32 0
|
||||
;%x = extractelement <1 x float> %1, i32 0
|
||||
;%q = fdiv float %y, %x
|
||||
;%a = call float @llvm.atan.f32 (float %q)
|
||||
;%rv = insertelement <1 x float> undef, float %a, i32 0
|
||||
;ret <1 x float> %rv
|
||||
; UNSUPPORTED!
|
||||
ret <1 x float > %0
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_exp(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_expf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
unary1to1(float, @llvm.exp.f32)
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_log(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_logf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
unary1to1(float, @llvm.log.f32)
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_pow(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_powf4(<1 x float> %0, <1 x float> %1)
|
||||
;ret <1 x float> %ret
|
||||
%r = extractelement <1 x float> %0, i32 0
|
||||
%e = extractelement <1 x float> %1, i32 0
|
||||
%s = call float @llvm.pow.f32(float %r,float %e)
|
||||
%rv = insertelement <1 x float> undef, float %s, i32 0
|
||||
ret <1 x float> %rv
|
||||
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
|
||||
define <1 x float> @__max_varying_float(<1 x float>, <1 x float>) nounwind readonly alwaysinline {
|
||||
; %call = call <1 x float> @llvm.x86.sse.max.ps(<1 x float> %0, <1 x float> %1)
|
||||
; ret <1 x float> %call
|
||||
%a = extractelement <1 x float> %0, i32 0
|
||||
%b = extractelement <1 x float> %1, i32 0
|
||||
%d = fcmp ogt float %a, %b
|
||||
%r = select i1 %d, float %a, float %b
|
||||
%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
ret <1 x float> %rv
|
||||
}
|
||||
|
||||
define <1 x float> @__min_varying_float(<1 x float>, <1 x float>) nounwind readonly alwaysinline {
|
||||
; %call = call <1 x float> @llvm.x86.sse.min.ps(<1 x float> %0, <1 x float> %1)
|
||||
; ret <1 x float> %call
|
||||
%a = extractelement <1 x float> %0, i32 0
|
||||
%b = extractelement <1 x float> %1, i32 0
|
||||
%d = fcmp olt float %a, %b
|
||||
%r = select i1 %d, float %a, float %b
|
||||
%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
ret <1 x float> %rv
|
||||
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; double precision sqrt
|
||||
|
||||
;declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
||||
|
||||
define <1 x double> @__sqrt_varying_double(<1 x double>) nounwind alwaysinline {
|
||||
;unarya2to4(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
||||
;ret <1 x double> %ret
|
||||
unary1to1(double, @llvm.sqrt.f64)
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; double precision min/max
|
||||
|
||||
;declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||
;declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <1 x double> @__min_varying_double(<1 x double>, <1 x double>) nounwind readnone {
|
||||
;binarsy2to4(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
||||
;ret <1 x double> %ret
|
||||
%a = extractelement <1 x double> %0, i32 0
|
||||
%b = extractelement <1 x double> %1, i32 0
|
||||
%d = fcmp olt double %a, %b
|
||||
%r = select i1 %d, double %a, double %b
|
||||
%rv = insertelement <1 x double> undef, double %r, i32 0
|
||||
ret <1 x double> %rv
|
||||
|
||||
}
|
||||
|
||||
define <1 x double> @__max_varying_double(<1 x double>, <1 x double>) nounwind readnone {
|
||||
;binary2sto4(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
||||
;ret <1 x double> %ret
|
||||
%a = extractelement <1 x double> %0, i32 0
|
||||
%b = extractelement <1 x double> %1, i32 0
|
||||
%d = fcmp ogt double %a, %b
|
||||
%r = select i1 %d, double %a, double %b
|
||||
%rv = insertelement <1 x double> undef, double %r, i32 0
|
||||
ret <1 x double> %rv
|
||||
|
||||
}
|
||||
|
||||
|
||||
define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; uniform float iv = extract(__rcp_u(v), 0);
|
||||
; return iv * (2. - v * iv);
|
||||
%r = fdiv float 1.,%0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding floats
|
||||
|
||||
define float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; roundss, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||
; the roundss intrinsic is a total mess--docs say:
|
||||
;
|
||||
; __m128 _mm_round_ss (__m128 a, __m128 b, const int c)
|
||||
;
|
||||
; b is a 128-bit parameter. The lowest 32 bits are the result of the rounding function
|
||||
; on b0. The higher order 96 bits are copied directly from input parameter a. The
|
||||
; return value is described by the following equations:
|
||||
;
|
||||
; r0 = RND(b0)
|
||||
; r1 = a1
|
||||
; r2 = a2
|
||||
; r3 = a3
|
||||
;
|
||||
; It doesn't matter what we pass as a, since we only need the r0 value
|
||||
; here. So we pass the same register for both.
|
||||
%v = insertelement<1 x float> undef, float %0, i32 0
|
||||
%rv = call <1 x float> @__round_varying_float(<1 x float> %v)
|
||||
%r=extractelement <1 x float> %rv, i32 0
|
||||
ret float %r
|
||||
|
||||
}
|
||||
|
||||
define float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
||||
%v = insertelement<1 x float> undef, float %0, i32 0
|
||||
%rv = call <1 x float> @__floor_varying_float(<1 x float> %v)
|
||||
%r=extractelement <1 x float> %rv, i32 0
|
||||
ret float %r
|
||||
|
||||
}
|
||||
|
||||
define float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
||||
%v = insertelement<1 x float> undef, float %0, i32 0
|
||||
%rv = call <1 x float> @__ceil_varying_float(<1 x float> %v)
|
||||
%r=extractelement <1 x float> %rv, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding doubles
|
||||
|
||||
|
||||
define double @__round_uniform_double(double) nounwind readonly alwaysinline {
|
||||
%rs=call double @round(double %0)
|
||||
ret double %rs
|
||||
}
|
||||
|
||||
define double @__floor_uniform_double(double) nounwind readonly alwaysinline {
|
||||
%rs = call double @floor(double %0)
|
||||
ret double %rs
|
||||
}
|
||||
|
||||
define double @__ceil_uniform_double(double) nounwind readonly alwaysinline {
|
||||
%rs = call double @ceil(double %0)
|
||||
ret double %rs
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; sqrt
|
||||
|
||||
|
||||
define float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||
%ret = call float @llvm.sqrt.f32(float %0)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define double @__sqrt_uniform_double(double) nounwind readonly alwaysinline {
|
||||
%ret = call double @llvm.sqrt.f64(double %0)
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rsqrt
|
||||
|
||||
|
||||
define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||
%s = call float @__sqrt_uniform_float(float %0)
|
||||
%r = call float @__rcp_uniform_float(float %s)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; fastmath
|
||||
|
||||
|
||||
define void @__fastmath() nounwind alwaysinline {
|
||||
; no-op
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
|
||||
|
||||
define float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||
%d = fcmp ogt float %0, %1
|
||||
%r = select i1 %d, float %0, float %1
|
||||
ret float %r
|
||||
|
||||
}
|
||||
|
||||
define float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||
%d = fcmp olt float %0, %1
|
||||
%r = select i1 %d, float %0, float %1
|
||||
ret float %r
|
||||
|
||||
}
|
||||
define double @__max_uniform_double(double, double) nounwind readonly alwaysinline {
|
||||
%d = fcmp ogt double %0, %1
|
||||
%r = select i1 %d, double %0, double %1
|
||||
ret double %r
|
||||
|
||||
}
|
||||
|
||||
define double @__min_uniform_double(double, double) nounwind readonly alwaysinline {
|
||||
%d = fcmp olt double %0, %1
|
||||
%r = select i1 %d, double %0, double %1
|
||||
ret double %r
|
||||
|
||||
}
|
||||
|
||||
define_shuffles()
|
||||
|
||||
ctlztz()
|
||||
|
||||
define_prefetches()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
|
||||
34
builtins/target-generic-16.ll
Normal file
34
builtins/target-generic-16.ll
Normal file
@@ -0,0 +1,34 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
define(`WIDTH',`16')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
34
builtins/target-generic-4.ll
Normal file
34
builtins/target-generic-4.ll
Normal file
@@ -0,0 +1,34 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
define(`WIDTH',`4')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
34
builtins/target-generic-8.ll
Normal file
34
builtins/target-generic-8.ll
Normal file
@@ -0,0 +1,34 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
define(`WIDTH',`8')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
336
builtins/target-generic-common.ll
Normal file
336
builtins/target-generic-common.ll
Normal file
@@ -0,0 +1,336 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
define(`MASK',`i1')
|
||||
include(`util.m4')
|
||||
|
||||
stdlib_core()
|
||||
scans()
|
||||
reduce_equal(WIDTH)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; broadcast/rotate/shuffle
|
||||
|
||||
declare <WIDTH x float> @__smear_float(float) nounwind readnone
|
||||
declare <WIDTH x double> @__smear_double(double) nounwind readnone
|
||||
declare <WIDTH x i8> @__smear_i8(i8) nounwind readnone
|
||||
declare <WIDTH x i16> @__smear_i16(i16) nounwind readnone
|
||||
declare <WIDTH x i32> @__smear_i32(i32) nounwind readnone
|
||||
declare <WIDTH x i64> @__smear_i64(i64) nounwind readnone
|
||||
|
||||
declare <WIDTH x float> @__broadcast_float(<WIDTH x float>, i32) nounwind readnone
|
||||
declare <WIDTH x double> @__broadcast_double(<WIDTH x double>, i32) nounwind readnone
|
||||
declare <WIDTH x i8> @__broadcast_i8(<WIDTH x i8>, i32) nounwind readnone
|
||||
declare <WIDTH x i16> @__broadcast_i16(<WIDTH x i16>, i32) nounwind readnone
|
||||
declare <WIDTH x i32> @__broadcast_i32(<WIDTH x i32>, i32) nounwind readnone
|
||||
declare <WIDTH x i64> @__broadcast_i64(<WIDTH x i64>, i32) nounwind readnone
|
||||
|
||||
declare <WIDTH x i8> @__rotate_i8(<WIDTH x i8>, i32) nounwind readnone
|
||||
declare <WIDTH x i16> @__rotate_i16(<WIDTH x i16>, i32) nounwind readnone
|
||||
declare <WIDTH x float> @__rotate_float(<WIDTH x float>, i32) nounwind readnone
|
||||
declare <WIDTH x i32> @__rotate_i32(<WIDTH x i32>, i32) nounwind readnone
|
||||
declare <WIDTH x double> @__rotate_double(<WIDTH x double>, i32) nounwind readnone
|
||||
declare <WIDTH x i64> @__rotate_i64(<WIDTH x i64>, i32) nounwind readnone
|
||||
|
||||
declare <WIDTH x i8> @__shuffle_i8(<WIDTH x i8>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i8> @__shuffle2_i8(<WIDTH x i8>, <WIDTH x i8>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i16> @__shuffle_i16(<WIDTH x i16>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i16> @__shuffle2_i16(<WIDTH x i16>, <WIDTH x i16>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x float> @__shuffle_float(<WIDTH x float>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x float> @__shuffle2_float(<WIDTH x float>, <WIDTH x float>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i32> @__shuffle_i32(<WIDTH x i32>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i32> @__shuffle2_i32(<WIDTH x i32>, <WIDTH x i32>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x double> @__shuffle_double(<WIDTH x double>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x double> @__shuffle2_double(<WIDTH x double>,
|
||||
<WIDTH x double>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i64> @__shuffle_i64(<WIDTH x i64>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i64> @__shuffle2_i64(<WIDTH x i64>, <WIDTH x i64>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; aos/soa
|
||||
|
||||
declare void @__soa_to_aos3_float(<WIDTH x float> %v0, <WIDTH x float> %v1,
|
||||
<WIDTH x float> %v2, float * noalias %p) nounwind
|
||||
declare void @__aos_to_soa3_float(float * noalias %p, <WIDTH x float> * %out0,
|
||||
<WIDTH x float> * %out1, <WIDTH x float> * %out2) nounwind
|
||||
declare void @__soa_to_aos4_float(<WIDTH x float> %v0, <WIDTH x float> %v1,
|
||||
<WIDTH x float> %v2, <WIDTH x float> %v3,
|
||||
float * noalias %p) nounwind
|
||||
declare void @__aos_to_soa4_float(float * noalias %p, <WIDTH x float> * noalias %out0,
|
||||
<WIDTH x float> * noalias %out1,
|
||||
<WIDTH x float> * noalias %out2,
|
||||
<WIDTH x float> * noalias %out3) nounwind
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; math
|
||||
|
||||
declare void @__fastmath() nounwind
|
||||
|
||||
;; round/floor/ceil
|
||||
|
||||
declare float @__round_uniform_float(float) nounwind readnone
|
||||
declare float @__floor_uniform_float(float) nounwind readnone
|
||||
declare float @__ceil_uniform_float(float) nounwind readnone
|
||||
|
||||
declare double @__round_uniform_double(double) nounwind readnone
|
||||
declare double @__floor_uniform_double(double) nounwind readnone
|
||||
declare double @__ceil_uniform_double(double) nounwind readnone
|
||||
|
||||
declare <WIDTH x float> @__round_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__floor_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__ceil_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x double> @__round_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__floor_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__ceil_varying_double(<WIDTH x double>) nounwind readnone
|
||||
|
||||
;; min/max
|
||||
|
||||
declare float @__max_uniform_float(float, float) nounwind readnone
|
||||
declare float @__min_uniform_float(float, float) nounwind readnone
|
||||
declare i32 @__min_uniform_int32(i32, i32) nounwind readnone
|
||||
declare i32 @__max_uniform_int32(i32, i32) nounwind readnone
|
||||
declare i32 @__min_uniform_uint32(i32, i32) nounwind readnone
|
||||
declare i32 @__max_uniform_uint32(i32, i32) nounwind readnone
|
||||
declare i64 @__min_uniform_int64(i64, i64) nounwind readnone
|
||||
declare i64 @__max_uniform_int64(i64, i64) nounwind readnone
|
||||
declare i64 @__min_uniform_uint64(i64, i64) nounwind readnone
|
||||
declare i64 @__max_uniform_uint64(i64, i64) nounwind readnone
|
||||
declare double @__min_uniform_double(double, double) nounwind readnone
|
||||
declare double @__max_uniform_double(double, double) nounwind readnone
|
||||
|
||||
declare <WIDTH x float> @__max_varying_float(<WIDTH x float>,
|
||||
<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__min_varying_float(<WIDTH x float>,
|
||||
<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i64> @__min_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||
declare <WIDTH x i64> @__max_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||
declare <WIDTH x i64> @__min_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||
declare <WIDTH x i64> @__max_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||
declare <WIDTH x double> @__min_varying_double(<WIDTH x double>,
|
||||
<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,
|
||||
<WIDTH x double>) nounwind readnone
|
||||
|
||||
;; sqrt/rsqrt/rcp
|
||||
|
||||
declare float @__rsqrt_uniform_float(float) nounwind readnone
|
||||
declare float @__rcp_uniform_float(float) nounwind readnone
|
||||
declare float @__sqrt_uniform_float(float) nounwind readnone
|
||||
declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||
|
||||
declare double @__sqrt_uniform_double(double) nounwind readnone
|
||||
declare <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone
|
||||
|
||||
;; bit ops
|
||||
|
||||
declare i32 @__popcnt_int32(i32) nounwind readnone
|
||||
declare i64 @__popcnt_int64(i64) nounwind readnone
|
||||
|
||||
declare i32 @__count_trailing_zeros_i32(i32) nounwind readnone
|
||||
declare i64 @__count_trailing_zeros_i64(i64) nounwind readnone
|
||||
declare i32 @__count_leading_zeros_i32(i32) nounwind readnone
|
||||
declare i64 @__count_leading_zeros_i64(i64) nounwind readnone
|
||||
|
||||
;; svml
|
||||
|
||||
; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
|
||||
; or, use the macro to call the 4-wide ones twice with our 8-wide
|
||||
; vectors...
|
||||
|
||||
declare <WIDTH x float> @__svml_sin(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_cos(<WIDTH x float>)
|
||||
declare void @__svml_sincos(<WIDTH x float>, <WIDTH x float> *, <WIDTH x float> *)
|
||||
declare <WIDTH x float> @__svml_tan(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_atan(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_atan2(<WIDTH x float>, <WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_exp(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_log(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_pow(<WIDTH x float>, <WIDTH x float>)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; reductions
|
||||
|
||||
declare i32 @__movmsk(<WIDTH x i1>) nounwind readnone
|
||||
|
||||
declare float @__reduce_add_float(<WIDTH x float>) nounwind readnone
|
||||
declare float @__reduce_min_float(<WIDTH x float>) nounwind readnone
|
||||
declare float @__reduce_max_float(<WIDTH x float>) nounwind readnone
|
||||
|
||||
declare i32 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone
|
||||
declare i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone
|
||||
declare i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone
|
||||
|
||||
declare i32 @__reduce_add_uint32(<WIDTH x i32>) nounwind readnone
|
||||
declare i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone
|
||||
declare i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone
|
||||
|
||||
declare double @__reduce_add_double(<WIDTH x double>) nounwind readnone
|
||||
declare double @__reduce_min_double(<WIDTH x double>) nounwind readnone
|
||||
declare double @__reduce_max_double(<WIDTH x double>) nounwind readnone
|
||||
|
||||
declare i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone
|
||||
declare i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone
|
||||
declare i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone
|
||||
|
||||
declare i64 @__reduce_add_uint64(<WIDTH x i64>) nounwind readnone
|
||||
declare i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone
|
||||
declare i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
|
||||
load_and_broadcast(WIDTH, i8, 8)
|
||||
load_and_broadcast(WIDTH, i16, 16)
|
||||
load_and_broadcast(WIDTH, i32, 32)
|
||||
load_and_broadcast(WIDTH, i64, 64)
|
||||
|
||||
declare <WIDTH x i8> @__masked_load_8(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||
declare <WIDTH x i16> @__masked_load_16(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||
declare <WIDTH x i32> @__masked_load_32(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||
declare <WIDTH x i64> @__masked_load_64(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||
|
||||
declare void @__masked_store_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
|
||||
<WIDTH x i1> %mask) nounwind
|
||||
|
||||
ifelse(LLVM_VERSION, `LLVM_3_1svn',`
|
||||
define void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||
<WIDTH x i1>) nounwind alwaysinline {
|
||||
%v = load <WIDTH x i8> * %0
|
||||
%v1 = select <WIDTH x i1> %2, <WIDTH x i8> %1, <WIDTH x i8> %v
|
||||
store <WIDTH x i8> %v1, <WIDTH x i8> * %0
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
|
||||
<WIDTH x i1>) nounwind alwaysinline {
|
||||
%v = load <WIDTH x i16> * %0
|
||||
%v1 = select <WIDTH x i1> %2, <WIDTH x i16> %1, <WIDTH x i16> %v
|
||||
store <WIDTH x i16> %v1, <WIDTH x i16> * %0
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||
<WIDTH x i1>) nounwind alwaysinline {
|
||||
%v = load <WIDTH x i32> * %0
|
||||
%v1 = select <WIDTH x i1> %2, <WIDTH x i32> %1, <WIDTH x i32> %v
|
||||
store <WIDTH x i32> %v1, <WIDTH x i32> * %0
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_64(<WIDTH x i64>* nocapture,
|
||||
<WIDTH x i64>, <WIDTH x i1>) nounwind alwaysinline {
|
||||
%v = load <WIDTH x i64> * %0
|
||||
%v1 = select <WIDTH x i1> %2, <WIDTH x i64> %1, <WIDTH x i64> %v
|
||||
store <WIDTH x i64> %v1, <WIDTH x i64> * %0
|
||||
ret void
|
||||
}
|
||||
',`
|
||||
declare void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
|
||||
<WIDTH x i1> %mask) nounwind
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
define(`gather_scatter', `
|
||||
declare <WIDTH x $1> @__gather_base_offsets32_$1(i8 * nocapture, <WIDTH x i32>,
|
||||
i32, <WIDTH x i32>, <WIDTH x i1>) nounwind readonly
|
||||
declare <WIDTH x $1> @__gather_base_offsets64_$1(i8 * nocapture, <WIDTH x i64>,
|
||||
i32, <WIDTH x i64>, <WIDTH x i1>) nounwind readonly
|
||||
declare <WIDTH x $1> @__gather32_$1(<WIDTH x i32>,
|
||||
<WIDTH x i1>) nounwind readonly
|
||||
declare <WIDTH x $1> @__gather64_$1(<WIDTH x i64>,
|
||||
<WIDTH x i1>) nounwind readonly
|
||||
|
||||
declare void @__scatter_base_offsets32_$1(i8* nocapture, <WIDTH x i32>,
|
||||
i32, <WIDTH x i32>, <WIDTH x $1>, <WIDTH x i1>) nounwind
|
||||
declare void @__scatter_base_offsets64_$1(i8* nocapture, <WIDTH x i64>,
|
||||
i32, <WIDTH x i64>, <WIDTH x $1>, <WIDTH x i1>) nounwind
|
||||
declare void @__scatter32_$1(<WIDTH x i32>, <WIDTH x $1>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__scatter64_$1(<WIDTH x i64>, <WIDTH x $1>,
|
||||
<WIDTH x i1>) nounwind
|
||||
')
|
||||
|
||||
gather_scatter(i8)
|
||||
gather_scatter(i16)
|
||||
gather_scatter(i32)
|
||||
gather_scatter(i64)
|
||||
|
||||
declare i32 @__packed_load_active(i32 * nocapture, <WIDTH x i32> * nocapture,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare i32 @__packed_store_active(i32 * nocapture, <WIDTH x i32> %vals,
|
||||
<WIDTH x i1>) nounwind
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; prefetch
|
||||
|
||||
declare void @__prefetch_read_uniform_1(i8 * nocapture) nounwind
|
||||
declare void @__prefetch_read_uniform_2(i8 * nocapture) nounwind
|
||||
declare void @__prefetch_read_uniform_3(i8 * nocapture) nounwind
|
||||
declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind
|
||||
|
||||
@@ -29,6 +29,11 @@
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
ctlztz()
|
||||
define_prefetches()
|
||||
define_shuffles()
|
||||
aossoa()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
@@ -36,12 +36,24 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; standard 8-wide definitions from m4 macros
|
||||
|
||||
stdlib_core(8)
|
||||
packed_load_and_store(8)
|
||||
scans(8)
|
||||
int64minmax(8)
|
||||
define(`WIDTH',`8')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-sse2-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-sse2-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
@@ -301,7 +313,7 @@ define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
}
|
||||
|
||||
define <4 x float> @__vec4_add_float(<4 x float> %v0,
|
||||
<4 x float> %v1) nounwind readnone alwaysinline {
|
||||
<4 x float> %v1) nounwind readnone alwaysinline {
|
||||
%v = fadd <4 x float> %v0, %v1
|
||||
ret <4 x float> %v
|
||||
}
|
||||
@@ -325,7 +337,7 @@ define float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
||||
|
||||
; helper function for reduce_add_int32
|
||||
define <4 x i32> @__vec4_add_int32(<4 x i32> %v0,
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
%v = add <4 x i32> %v0, %v1
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
@@ -425,10 +437,10 @@ load_and_broadcast(8, i16, 16)
|
||||
load_and_broadcast(8, i32, 32)
|
||||
load_and_broadcast(8, i64, 64)
|
||||
|
||||
load_masked(8, i8, 8, 1)
|
||||
load_masked(8, i16, 16, 2)
|
||||
load_masked(8, i32, 32, 4)
|
||||
load_masked(8, i64, 64, 8)
|
||||
masked_load(8, i8, 8, 1)
|
||||
masked_load(8, i16, 16, 2)
|
||||
masked_load(8, i32, 32, 4)
|
||||
masked_load(8, i64, 64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
@@ -33,12 +33,24 @@
|
||||
;; Define the standard library builtins for the SSE2 target
|
||||
|
||||
; Define some basics for a 4-wide target
|
||||
stdlib_core(4)
|
||||
packed_load_and_store(4)
|
||||
scans(4)
|
||||
int64minmax(4)
|
||||
define(`WIDTH',`4')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-sse2-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-sse2-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding
|
||||
@@ -144,7 +156,7 @@ define <4 x double> @__ceil_varying_double(<4 x double>) nounwind readonly alway
|
||||
; from %1, and otherwise return the value from %0.
|
||||
|
||||
define <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%cleared_old = and <4 x i32> %0, %notmask
|
||||
%masked_new = and <4 x i32> %1, %mask
|
||||
@@ -153,7 +165,7 @@ define <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
|
||||
}
|
||||
|
||||
define <4 x float> @__vselect_float(<4 x float>, <4 x float>,
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
%v0 = bitcast <4 x float> %0 to <4 x i32>
|
||||
%v1 = bitcast <4 x float> %1 to <4 x i32>
|
||||
%r = call <4 x i32> @__vselect_i32(<4 x i32> %v0, <4 x i32> %v1, <4 x i32> %mask)
|
||||
@@ -552,10 +564,10 @@ load_and_broadcast(4, i16, 16)
|
||||
load_and_broadcast(4, i32, 32)
|
||||
load_and_broadcast(4, i64, 64)
|
||||
|
||||
load_masked(4, i8, 8, 1)
|
||||
load_masked(4, i16, 16, 2)
|
||||
load_masked(4, i32, 32, 4)
|
||||
load_masked(4, i64, 64, 8)
|
||||
masked_load(4, i8, 8, 1)
|
||||
masked_load(4, i16, 16, 2)
|
||||
masked_load(4, i32, 32, 4)
|
||||
masked_load(4, i64, 64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
@@ -29,6 +29,11 @@
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
ctlztz()
|
||||
define_prefetches()
|
||||
define_shuffles()
|
||||
aossoa()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding floats
|
||||
|
||||
@@ -36,12 +36,24 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; standard 8-wide definitions from m4 macros
|
||||
|
||||
stdlib_core(8)
|
||||
packed_load_and_store(8)
|
||||
scans(8)
|
||||
int64minmax(8)
|
||||
define(`WIDTH',`8')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-sse4-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-sse4-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
@@ -252,7 +264,7 @@ define float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
||||
|
||||
; helper function for reduce_add_int32
|
||||
define <4 x i32> @__vec4_add_int32(<4 x i32> %v0,
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
%v = add <4 x i32> %v0, %v1
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
@@ -352,10 +364,10 @@ load_and_broadcast(8, i16, 16)
|
||||
load_and_broadcast(8, i32, 32)
|
||||
load_and_broadcast(8, i64, 64)
|
||||
|
||||
load_masked(8, i8, 8, 1)
|
||||
load_masked(8, i16, 16, 2)
|
||||
load_masked(8, i32, 32, 4)
|
||||
load_masked(8, i64, 64, 8)
|
||||
masked_load(8, i8, 8, 1)
|
||||
masked_load(8, i16, 16, 2)
|
||||
masked_load(8, i32, 32, 4)
|
||||
masked_load(8, i64, 64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
@@ -33,12 +33,24 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; Define common 4-wide stuff
|
||||
stdlib_core(4)
|
||||
packed_load_and_store(4)
|
||||
scans(4)
|
||||
int64minmax(4)
|
||||
define(`WIDTH',`4')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-sse4-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-sse4-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
@@ -451,10 +463,10 @@ load_and_broadcast(4, i16, 16)
|
||||
load_and_broadcast(4, i32, 32)
|
||||
load_and_broadcast(4, i64, 64)
|
||||
|
||||
load_masked(4, i8, 8, 1)
|
||||
load_masked(4, i16, 16, 2)
|
||||
load_masked(4, i32, 32, 4)
|
||||
load_masked(4, i64, 64, 8)
|
||||
masked_load(4, i8, 8, 1)
|
||||
masked_load(4, i16, 16, 2)
|
||||
masked_load(4, i32, 32, 4)
|
||||
masked_load(4, i64, 64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
File diff suppressed because it is too large
Load Diff
4511
cbackend.cpp
Normal file
4511
cbackend.cpp
Normal file
File diff suppressed because it is too large
Load Diff
202
ctx.h
202
ctx.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,6 +39,7 @@
|
||||
#define ISPC_CTX_H 1
|
||||
|
||||
#include "ispc.h"
|
||||
#include <map>
|
||||
#include <llvm/InstrTypes.h>
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/Analysis/DIBuilder.h>
|
||||
@@ -98,9 +99,9 @@ public:
|
||||
the function entry mask and the internal mask. */
|
||||
llvm::Value *GetFullMask();
|
||||
|
||||
/** Provides the alloca'd pointer to memory to store the full function
|
||||
mask. This is only used to wire up the __mask builtin variable. */
|
||||
void SetMaskPointer(llvm::Value *p);
|
||||
/** Returns a pointer to storage in memory that stores the current full
|
||||
mask. */
|
||||
llvm::Value *GetFullMaskPointer();
|
||||
|
||||
/** Provides the value of the mask at function entry */
|
||||
void SetFunctionMask(llvm::Value *val);
|
||||
@@ -160,10 +161,8 @@ public:
|
||||
void EndLoop();
|
||||
|
||||
/** Indicates that code generation for a 'foreach' or 'foreach_tiled'
|
||||
loop is about to start. The provided basic block pointer indicates
|
||||
where control flow should go if a 'continue' statement is executed
|
||||
in the loop. */
|
||||
void StartForeach(llvm::BasicBlock *continueTarget);
|
||||
loop is about to start. */
|
||||
void StartForeach();
|
||||
void EndForeach();
|
||||
|
||||
/** Emit code for a 'break' statement in a loop. If doCoherenceCheck
|
||||
@@ -186,12 +185,69 @@ public:
|
||||
previous iteration. */
|
||||
void RestoreContinuedLanes();
|
||||
|
||||
/** Indicates that code generation for a "switch" statement is about to
|
||||
start. isUniform indicates whether the "switch" value is uniform,
|
||||
and bbAfterSwitch gives the basic block immediately following the
|
||||
"switch" statement. (For example, if the switch condition is
|
||||
uniform, we jump here upon executing a "break" statement.) */
|
||||
void StartSwitch(bool isUniform, llvm::BasicBlock *bbAfterSwitch);
|
||||
/** Indicates the end of code generation for a "switch" statement. */
|
||||
void EndSwitch();
|
||||
|
||||
/** Emits code for a "switch" statement in the program.
|
||||
@param expr Gives the value of the expression after the "switch"
|
||||
@param defaultBlock Basic block to execute for the "default" case. This
|
||||
should be NULL if there is no "default" label inside
|
||||
the switch.
|
||||
@param caseBlocks vector that stores the mapping from label values
|
||||
after "case" statements to basic blocks corresponding
|
||||
to the "case" labels.
|
||||
@param nextBlocks For each basic block for a "case" or "default"
|
||||
label, this gives the basic block for the
|
||||
immediately-following "case" or "default" label (or
|
||||
the basic block after the "switch" statement for the
|
||||
last label.)
|
||||
*/
|
||||
void SwitchInst(llvm::Value *expr, llvm::BasicBlock *defaultBlock,
|
||||
const std::vector<std::pair<int, llvm::BasicBlock *> > &caseBlocks,
|
||||
const std::map<llvm::BasicBlock *, llvm::BasicBlock *> &nextBlocks);
|
||||
|
||||
/** Generates code for a "default" label after a "switch" statement.
|
||||
The checkMask parameter indicates whether additional code should be
|
||||
generated to check to see if the execution mask is all off after
|
||||
the default label (in which case a jump to the following label will
|
||||
be issued. */
|
||||
void EmitDefaultLabel(bool checkMask, SourcePos pos);
|
||||
|
||||
/** Generates code for a "case" label after a "switch" statement. See
|
||||
the documentation for EmitDefaultLabel() for discussion of the
|
||||
checkMask parameter. */
|
||||
void EmitCaseLabel(int value, bool checkMask, SourcePos pos);
|
||||
|
||||
/** Returns the current number of nested levels of 'varying' control
|
||||
flow */
|
||||
int VaryingCFDepth() const;
|
||||
|
||||
bool InForeachLoop() const;
|
||||
|
||||
/** Temporarily disables emission of performance warnings from gathers
|
||||
and scatters from subsequent code. */
|
||||
void DisableGatherScatterWarnings();
|
||||
|
||||
/** Reenables emission of gather/scatter performance warnings. */
|
||||
void EnableGatherScatterWarnings();
|
||||
|
||||
void SetContinueTarget(llvm::BasicBlock *bb) { continueTarget = bb; }
|
||||
|
||||
/** Step through the code and find label statements; create a basic
|
||||
block for each one, so that subsequent calls to
|
||||
GetLabeledBasicBlock() return the corresponding basic block. */
|
||||
void InitializeLabelMap(Stmt *code);
|
||||
|
||||
/** If there is a label in the function with the given name, return the
|
||||
new basic block that it starts. */
|
||||
llvm::BasicBlock *GetLabeledBasicBlock(const std::string &label);
|
||||
|
||||
/** Called to generate code for 'return' statement; value is the
|
||||
expression in the return statement (if non-NULL), and
|
||||
doCoherenceCheck indicates whether instructions should be generated
|
||||
@@ -211,6 +267,10 @@ public:
|
||||
i1 value that indicates if all of the mask lanes are on. */
|
||||
llvm::Value *All(llvm::Value *mask);
|
||||
|
||||
/** Given a boolean mask value of type LLVMTypes::MaskType, return an
|
||||
i1 value that indicates if all of the mask lanes are off. */
|
||||
llvm::Value *None(llvm::Value *mask);
|
||||
|
||||
/** Given a boolean mask value of type LLVMTypes::MaskType, return an
|
||||
i32 value wherein the i'th bit is on if and only if the i'th lane
|
||||
of the mask is on. */
|
||||
@@ -320,25 +380,35 @@ public:
|
||||
array, for pointer types). */
|
||||
llvm::Value *SmearUniform(llvm::Value *value, const char *name = NULL);
|
||||
|
||||
llvm::Value *BitCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Value *BitCastInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Value *PtrToIntInst(llvm::Value *value, const char *name = NULL);
|
||||
llvm::Value *PtrToIntInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Value *PtrToIntInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Value *IntToPtrInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Value *IntToPtrInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
|
||||
llvm::Instruction *TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Instruction *TruncInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
|
||||
LLVM_TYPE_CONST llvm::Type *type, const char *name = NULL);
|
||||
llvm::Instruction *FPCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Type *type, const char *name = NULL);
|
||||
llvm::Instruction *FPCastInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *SExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Instruction *SExtInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Instruction *ZExtInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
|
||||
/** Given two integer-typed values (but possibly one vector and the
|
||||
other not, and or of possibly-different bit-widths), update their
|
||||
values as needed so that the two have the same (more general)
|
||||
type. */
|
||||
void MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1);
|
||||
|
||||
/** Create a new slice pointer out of the given pointer to an soa type
|
||||
and an integer offset to a slice within that type. */
|
||||
llvm::Value *MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset);
|
||||
|
||||
/** These GEP methods are generalizations of the standard ones in LLVM;
|
||||
they support both uniform and varying basePtr values as well as
|
||||
uniform and varying index values (arrays of indices). Varying base
|
||||
@@ -359,7 +429,8 @@ public:
|
||||
the type of the pointer, though it may be NULL if the base pointer
|
||||
is uniform. */
|
||||
llvm::Value *AddElementOffset(llvm::Value *basePtr, int elementNum,
|
||||
const Type *ptrType, const char *name = NULL);
|
||||
const Type *ptrType, const char *name = NULL,
|
||||
const PointerType **resultPtrType = NULL);
|
||||
|
||||
/** Load from the memory location(s) given by lvalue, using the given
|
||||
mask. The lvalue may be varying, in which case this corresponds to
|
||||
@@ -377,7 +448,7 @@ public:
|
||||
instruction is added at the start of the function in the entry
|
||||
basic block; if it should be added to the current basic block, then
|
||||
the atEntryBlock parameter should be false. */
|
||||
llvm::Value *AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType,
|
||||
llvm::Value *AllocaInst(llvm::Type *llvmType,
|
||||
const char *name = NULL, int align = 0,
|
||||
bool atEntryBlock = true);
|
||||
|
||||
@@ -390,7 +461,14 @@ public:
|
||||
varying, the given storeMask is used to mask the stores so that
|
||||
they only execute for the active program instances. */
|
||||
void StoreInst(llvm::Value *value, llvm::Value *ptr,
|
||||
llvm::Value *storeMask, const Type *ptrType);
|
||||
llvm::Value *storeMask, const Type *valueType,
|
||||
const Type *ptrType);
|
||||
|
||||
/** Copy count bytes of memory from the location pointed to by src to
|
||||
the location pointed to by dest. (src and dest must not be
|
||||
overlapping.) */
|
||||
void MemcpyInst(llvm::Value *dest, llvm::Value *src, llvm::Value *count,
|
||||
llvm::Value *align = NULL);
|
||||
|
||||
void BranchInst(llvm::BasicBlock *block);
|
||||
void BranchInst(llvm::BasicBlock *trueBlock, llvm::BasicBlock *falseBlock,
|
||||
@@ -407,7 +485,7 @@ public:
|
||||
llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
|
||||
const char *name = NULL);
|
||||
|
||||
llvm::PHINode *PhiNode(LLVM_TYPE_CONST llvm::Type *type, int count,
|
||||
llvm::PHINode *PhiNode(llvm::Type *type, int count,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0,
|
||||
llvm::Value *val1, const char *name = NULL);
|
||||
@@ -446,6 +524,9 @@ private:
|
||||
/** Pointer to the Function for which we're currently generating code. */
|
||||
Function *function;
|
||||
|
||||
/** LLVM function representation for the current function. */
|
||||
llvm::Function *llvmFunction;
|
||||
|
||||
/** The basic block into which we add any alloca instructions that need
|
||||
to go at the very start of the function. */
|
||||
llvm::BasicBlock *allocaBlock;
|
||||
@@ -479,10 +560,10 @@ private:
|
||||
the loop. */
|
||||
llvm::Value *loopMask;
|
||||
|
||||
/** If currently in a loop body, this is a pointer to memory to store a
|
||||
mask value that represents which of the lanes have executed a
|
||||
'break' statement. If we're not in a loop body, this should be
|
||||
NULL. */
|
||||
/** If currently in a loop body or switch statement, this is a pointer
|
||||
to memory to store a mask value that represents which of the lanes
|
||||
have executed a 'break' statement. If we're not in a loop body or
|
||||
switch, this should be NULL. */
|
||||
llvm::Value *breakLanesPtr;
|
||||
|
||||
/** Similar to breakLanesPtr, if we're inside a loop, this is a pointer
|
||||
@@ -490,16 +571,49 @@ private:
|
||||
'continue' statement. */
|
||||
llvm::Value *continueLanesPtr;
|
||||
|
||||
/** If we're inside a loop, this gives the basic block immediately
|
||||
after the current loop, which we will jump to if all of the lanes
|
||||
have executed a break statement or are otherwise done with the
|
||||
loop. */
|
||||
/** If we're inside a loop or switch statement, this gives the basic
|
||||
block immediately after the current loop or switch, which we will
|
||||
jump to if all of the lanes have executed a break statement or are
|
||||
otherwise done with it. */
|
||||
llvm::BasicBlock *breakTarget;
|
||||
|
||||
/** If we're inside a loop, this gives the block to jump to if all of
|
||||
the running lanes have executed a 'continue' statement. */
|
||||
llvm::BasicBlock *continueTarget;
|
||||
|
||||
/** @name Switch statement state
|
||||
|
||||
These variables store various state that's active when we're
|
||||
generating code for a switch statement. They should all be NULL
|
||||
outside of a switch.
|
||||
@{
|
||||
*/
|
||||
|
||||
/** The value of the expression used to determine which case in the
|
||||
statements after the switch to execute. */
|
||||
llvm::Value *switchExpr;
|
||||
|
||||
/** Map from case label numbers to the basic block that will hold code
|
||||
for that case. */
|
||||
const std::vector<std::pair<int, llvm::BasicBlock *> > *caseBlocks;
|
||||
|
||||
/** The basic block of code to run for the "default" label in the
|
||||
switch statement. */
|
||||
llvm::BasicBlock *defaultBlock;
|
||||
|
||||
/** For each basic block for the code for cases (and the default label,
|
||||
if present), this map gives the basic block for the immediately
|
||||
following case/default label. */
|
||||
const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *nextBlocks;
|
||||
|
||||
/** Records whether the switch condition was uniform; this is a
|
||||
distinct notion from whether the switch represents uniform or
|
||||
varying control flow; we may have varying control flow from a
|
||||
uniform switch condition if there is a 'break' inside the switch
|
||||
that's under varying control flow. */
|
||||
bool switchConditionWasUniform;
|
||||
/** @} */
|
||||
|
||||
/** A pointer to memory that records which of the program instances
|
||||
have executed a 'return' statement (and are thus really truly done
|
||||
running any more instructions in this functions. */
|
||||
@@ -518,7 +632,7 @@ private:
|
||||
std::vector<CFInfo *> controlFlowInfo;
|
||||
|
||||
/** DIFile object corresponding to the source file where the current
|
||||
function was defined (used for debugging info0. */
|
||||
function was defined (used for debugging info). */
|
||||
llvm::DIFile diFile;
|
||||
|
||||
/** DISubprogram corresponding to this function (used for debugging
|
||||
@@ -537,9 +651,17 @@ private:
|
||||
tasks launched from the current function. */
|
||||
llvm::Value *launchGroupHandlePtr;
|
||||
|
||||
/** Nesting count of the number of times calling code has disabled (and
|
||||
not yet reenabled) gather/scatter performance warnings. */
|
||||
int disableGSWarningCount;
|
||||
|
||||
std::map<std::string, llvm::BasicBlock *> labelMap;
|
||||
|
||||
static bool initLabelBBlocks(ASTNode *node, void *data);
|
||||
|
||||
llvm::Value *pointerVectorToVoidPointers(llvm::Value *value);
|
||||
static void addGSMetadata(llvm::Value *inst, SourcePos pos);
|
||||
bool ifsInLoopAllUniform() const;
|
||||
bool ifsInCFAllUniform(int cfType) const;
|
||||
void jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target);
|
||||
llvm::Value *emitGatherCallback(llvm::Value *lvalue, llvm::Value *retPtr);
|
||||
|
||||
@@ -547,13 +669,25 @@ private:
|
||||
const Type *ptrType);
|
||||
|
||||
void restoreMaskGivenReturns(llvm::Value *oldMask);
|
||||
void addSwitchMaskCheck(llvm::Value *mask);
|
||||
bool inSwitchStatement() const;
|
||||
llvm::Value *getMaskAtSwitchEntry();
|
||||
|
||||
void scatter(llvm::Value *value, llvm::Value *ptr, const Type *ptrType,
|
||||
llvm::Value *mask);
|
||||
CFInfo *popCFState();
|
||||
|
||||
void scatter(llvm::Value *value, llvm::Value *ptr, const Type *valueType,
|
||||
const Type *ptrType, llvm::Value *mask);
|
||||
void maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType,
|
||||
llvm::Value *mask);
|
||||
llvm::Value *gather(llvm::Value *ptr, const Type *ptrType, llvm::Value *mask,
|
||||
const char *name);
|
||||
void storeUniformToSOA(llvm::Value *value, llvm::Value *ptr,
|
||||
llvm::Value *mask, const Type *valueType,
|
||||
const PointerType *ptrType);
|
||||
llvm::Value *loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask,
|
||||
const PointerType *ptrType, const char *name);
|
||||
|
||||
llvm::Value *gather(llvm::Value *ptr, const PointerType *ptrType,
|
||||
llvm::Value *mask, const char *name);
|
||||
|
||||
llvm::Value *addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType);
|
||||
};
|
||||
|
||||
|
||||
641
decl.cpp
641
decl.cpp
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,7 +33,7 @@
|
||||
|
||||
/** @file decl.cpp
|
||||
@brief Implementations of classes related to turning declarations into
|
||||
symbols and types.
|
||||
symbol names and types.
|
||||
*/
|
||||
|
||||
#include "decl.h"
|
||||
@@ -44,8 +44,22 @@
|
||||
#include "stmt.h"
|
||||
#include "expr.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <set>
|
||||
|
||||
static void
|
||||
lPrintTypeQualifiers(int typeQualifiers) {
|
||||
if (typeQualifiers & TYPEQUAL_INLINE) printf("inline ");
|
||||
if (typeQualifiers & TYPEQUAL_CONST) printf("const ");
|
||||
if (typeQualifiers & TYPEQUAL_UNIFORM) printf("uniform ");
|
||||
if (typeQualifiers & TYPEQUAL_VARYING) printf("varying ");
|
||||
if (typeQualifiers & TYPEQUAL_TASK) printf("task ");
|
||||
if (typeQualifiers & TYPEQUAL_SIGNED) printf("signed ");
|
||||
if (typeQualifiers & TYPEQUAL_UNSIGNED) printf("unsigned ");
|
||||
if (typeQualifiers & TYPEQUAL_EXPORT) printf("export ");
|
||||
}
|
||||
|
||||
|
||||
/** Given a Type and a set of type qualifiers, apply the type qualifiers to
|
||||
the type, returning the type that is the result.
|
||||
*/
|
||||
@@ -54,6 +68,25 @@ lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) {
|
||||
if (type == NULL)
|
||||
return NULL;
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_CONST) != 0)
|
||||
type = type->GetAsConstType();
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_UNIFORM) != 0) {
|
||||
if (Type::Equal(type, AtomicType::Void))
|
||||
Error(pos, "\"uniform\" qualifier is illegal with \"void\" type.");
|
||||
else
|
||||
type = type->GetAsUniformType();
|
||||
}
|
||||
else if ((typeQualifiers & TYPEQUAL_VARYING) != 0) {
|
||||
if (Type::Equal(type, AtomicType::Void))
|
||||
Error(pos, "\"varying\" qualifier is illegal with \"void\" type.");
|
||||
else
|
||||
type = type->GetAsVaryingType();
|
||||
}
|
||||
else
|
||||
if (Type::Equal(type, AtomicType::Void) == false)
|
||||
type = type->GetAsUnboundVariabilityType();
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_UNSIGNED) != 0) {
|
||||
if ((typeQualifiers & TYPEQUAL_SIGNED) != 0)
|
||||
Error(pos, "Illegal to apply both \"signed\" and \"unsigned\" "
|
||||
@@ -62,30 +95,19 @@ lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) {
|
||||
const Type *unsignedType = type->GetAsUnsignedType();
|
||||
if (unsignedType != NULL)
|
||||
type = unsignedType;
|
||||
else
|
||||
else {
|
||||
const Type *resolvedType =
|
||||
type->ResolveUnboundVariability(Variability::Varying);
|
||||
Error(pos, "\"unsigned\" qualifier is illegal with \"%s\" type.",
|
||||
type->GetString().c_str());
|
||||
|
||||
resolvedType->GetString().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_SIGNED) != 0 && type->IsIntType() == false)
|
||||
if ((typeQualifiers & TYPEQUAL_SIGNED) != 0 && type->IsIntType() == false) {
|
||||
const Type *resolvedType =
|
||||
type->ResolveUnboundVariability(Variability::Varying);
|
||||
Error(pos, "\"signed\" qualifier is illegal with non-integer type "
|
||||
"\"%s\".", type->GetString().c_str());
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_CONST) != 0)
|
||||
type = type->GetAsConstType();
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_UNIFORM) != 0)
|
||||
type = type->GetAsUniformType();
|
||||
else if ((typeQualifiers & TYPEQUAL_VARYING) != 0)
|
||||
type = type->GetAsVaryingType();
|
||||
else {
|
||||
// otherwise, structs are uniform by default and everything
|
||||
// else is varying by default
|
||||
if (dynamic_cast<const StructType *>(type->GetBaseType()) != NULL)
|
||||
type = type->GetAsUniformType();
|
||||
else
|
||||
type = type->GetAsVaryingType();
|
||||
"\"%s\".", resolvedType->GetString().c_str());
|
||||
}
|
||||
|
||||
return type;
|
||||
@@ -106,18 +128,59 @@ DeclSpecs::DeclSpecs(const Type *t, StorageClass sc, int tq) {
|
||||
|
||||
const Type *
|
||||
DeclSpecs::GetBaseType(SourcePos pos) const {
|
||||
const Type *bt = baseType;
|
||||
const Type *retType = baseType;
|
||||
|
||||
if (retType == NULL) {
|
||||
Warning(pos, "No type specified in declaration. Assuming int32.");
|
||||
retType = AtomicType::UniformInt32->GetAsUnboundVariabilityType();
|
||||
}
|
||||
|
||||
if (vectorSize > 0) {
|
||||
const AtomicType *atomicType = dynamic_cast<const AtomicType *>(bt);
|
||||
const AtomicType *atomicType = dynamic_cast<const AtomicType *>(retType);
|
||||
if (atomicType == NULL) {
|
||||
Error(pos, "Only atomic types (int, float, ...) are legal for vector "
|
||||
"types.");
|
||||
return NULL;
|
||||
}
|
||||
bt = new VectorType(atomicType, vectorSize);
|
||||
retType = new VectorType(atomicType, vectorSize);
|
||||
}
|
||||
|
||||
return lApplyTypeQualifiers(typeQualifiers, bt, pos);
|
||||
retType = lApplyTypeQualifiers(typeQualifiers, retType, pos);
|
||||
|
||||
if (soaWidth > 0) {
|
||||
const StructType *st = dynamic_cast<const StructType *>(retType);
|
||||
|
||||
if (st == NULL) {
|
||||
Error(pos, "Illegal to provide soa<%d> qualifier with non-struct "
|
||||
"type \"%s\".", soaWidth, retType->GetString().c_str());
|
||||
return NULL;
|
||||
}
|
||||
else if (soaWidth <= 0 || (soaWidth & (soaWidth - 1)) != 0) {
|
||||
Error(pos, "soa<%d> width illegal. Value must be positive power "
|
||||
"of two.", soaWidth);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (st->IsUniformType()) {
|
||||
Error(pos, "\"uniform\" qualifier and \"soa<%d>\" qualifier can't "
|
||||
"both be used in a type declaration.", soaWidth);
|
||||
return NULL;
|
||||
}
|
||||
else if (st->IsVaryingType()) {
|
||||
Error(pos, "\"varying\" qualifier and \"soa<%d>\" qualifier can't "
|
||||
"both be used in a type declaration.", soaWidth);
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
retType = st->GetAsSOAType(soaWidth);
|
||||
|
||||
if (soaWidth < g->target.vectorWidth)
|
||||
PerformanceWarning(pos, "soa<%d> width smaller than gang size %d "
|
||||
"currently leads to inefficient code to access "
|
||||
"soa types.", soaWidth, g->target.vectorWidth);
|
||||
}
|
||||
|
||||
return retType;
|
||||
}
|
||||
|
||||
|
||||
@@ -127,7 +190,6 @@ lGetStorageClassName(StorageClass storageClass) {
|
||||
case SC_NONE: return "";
|
||||
case SC_EXTERN: return "extern";
|
||||
case SC_EXTERN_C: return "extern \"C\"";
|
||||
case SC_EXPORT: return "export";
|
||||
case SC_STATIC: return "static";
|
||||
case SC_TYPEDEF: return "typedef";
|
||||
default: FATAL("Unhandled storage class in lGetStorageClassName");
|
||||
@@ -138,21 +200,14 @@ lGetStorageClassName(StorageClass storageClass) {
|
||||
|
||||
void
|
||||
DeclSpecs::Print() const {
|
||||
printf("%s ", lGetStorageClassName(storageClass));
|
||||
printf("Declspecs: [%s ", lGetStorageClassName(storageClass));
|
||||
|
||||
if (soaWidth > 0) printf("soa<%d> ", soaWidth);
|
||||
|
||||
if (typeQualifiers & TYPEQUAL_INLINE) printf("inline ");
|
||||
if (typeQualifiers & TYPEQUAL_CONST) printf("const ");
|
||||
if (typeQualifiers & TYPEQUAL_UNIFORM) printf("uniform ");
|
||||
if (typeQualifiers & TYPEQUAL_VARYING) printf("varying ");
|
||||
if (typeQualifiers & TYPEQUAL_TASK) printf("task ");
|
||||
if (typeQualifiers & TYPEQUAL_SIGNED) printf("signed ");
|
||||
if (typeQualifiers & TYPEQUAL_UNSIGNED) printf("unsigned ");
|
||||
|
||||
printf("%s", baseType->GetString().c_str());
|
||||
lPrintTypeQualifiers(typeQualifiers);
|
||||
printf("base type: %s", baseType->GetString().c_str());
|
||||
|
||||
if (vectorSize > 0) printf("<%d>", vectorSize);
|
||||
printf("]");
|
||||
}
|
||||
|
||||
|
||||
@@ -163,151 +218,177 @@ Declarator::Declarator(DeclaratorKind dk, SourcePos p)
|
||||
: pos(p), kind(dk) {
|
||||
child = NULL;
|
||||
typeQualifiers = 0;
|
||||
storageClass = SC_NONE;
|
||||
arraySize = -1;
|
||||
sym = NULL;
|
||||
type = NULL;
|
||||
initExpr = NULL;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Declarator::InitFromDeclSpecs(DeclSpecs *ds) {
|
||||
const Type *t = GetType(ds);
|
||||
Symbol *sym = GetSymbol();
|
||||
if (sym != NULL) {
|
||||
sym->type = t;
|
||||
sym->storageClass = ds->storageClass;
|
||||
const Type *baseType = ds->GetBaseType(pos);
|
||||
InitFromType(baseType, ds);
|
||||
|
||||
if (type == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
storageClass = ds->storageClass;
|
||||
|
||||
if (ds->declSpecList.size() > 0 &&
|
||||
dynamic_cast<const FunctionType *>(type) == NULL) {
|
||||
Error(pos, "__declspec specifiers for non-function type \"%s\" are "
|
||||
"not used.", type->GetString().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Symbol *
|
||||
Declarator::GetSymbol() const {
|
||||
// The symbol lives at the last child in the chain, so walk down there
|
||||
// and return the one there.
|
||||
const Declarator *d = this;
|
||||
while (d->child != NULL)
|
||||
d = d->child;
|
||||
return d->sym;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Declarator::Print() const {
|
||||
Symbol *sym = GetSymbol();
|
||||
if (sym != NULL)
|
||||
printf("%s", sym->name.c_str());
|
||||
Declarator::Print(int indent) const {
|
||||
printf("%*cdeclarator: [", indent, ' ');
|
||||
pos.Print();
|
||||
|
||||
lPrintTypeQualifiers(typeQualifiers);
|
||||
printf("%s ", lGetStorageClassName(storageClass));
|
||||
if (name.size() > 0)
|
||||
printf("%s", name.c_str());
|
||||
else
|
||||
printf("(null symbol)");
|
||||
printf("(unnamed)");
|
||||
|
||||
printf(", array size = %d", arraySize);
|
||||
|
||||
printf(", kind = ");
|
||||
switch (kind) {
|
||||
case DK_BASE: printf("base"); break;
|
||||
case DK_POINTER: printf("pointer"); break;
|
||||
case DK_REFERENCE: printf("reference"); break;
|
||||
case DK_ARRAY: printf("array"); break;
|
||||
case DK_FUNCTION: printf("function"); break;
|
||||
default: FATAL("Unhandled declarator kind");
|
||||
}
|
||||
|
||||
if (initExpr != NULL) {
|
||||
printf(" = (");
|
||||
initExpr->Print();
|
||||
printf(")");
|
||||
}
|
||||
pos.Print();
|
||||
}
|
||||
|
||||
|
||||
Symbol *
|
||||
Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
|
||||
const FunctionType *type =
|
||||
dynamic_cast<const FunctionType *>(GetType(ds));
|
||||
if (type == NULL)
|
||||
return NULL;
|
||||
|
||||
Symbol *declSym = GetSymbol();
|
||||
assert(declSym != NULL);
|
||||
|
||||
// Get the symbol for the function from the symbol table. (It should
|
||||
// already have been added to the symbol table by AddGlobal() by the
|
||||
// time we get here.)
|
||||
Symbol *funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type);
|
||||
if (funSym != NULL)
|
||||
// May be NULL due to error earlier in compilation
|
||||
funSym->pos = pos;
|
||||
|
||||
// Walk down to the declarator for the function. (We have to get past
|
||||
// the stuff that specifies the function's return type before we get to
|
||||
// the function's declarator.)
|
||||
Declarator *d = this;
|
||||
while (d != NULL && d->kind != DK_FUNCTION)
|
||||
d = d->child;
|
||||
assert(d != NULL);
|
||||
|
||||
for (unsigned int i = 0; i < d->functionParams.size(); ++i) {
|
||||
Declaration *pdecl = d->functionParams[i];
|
||||
assert(pdecl->declarators.size() == 1);
|
||||
funArgs->push_back(pdecl->declarators[0]->GetSymbol());
|
||||
if (functionParams.size() > 0) {
|
||||
for (unsigned int i = 0; i < functionParams.size(); ++i) {
|
||||
printf("\n%*cfunc param %d:\n", indent, ' ', i);
|
||||
functionParams[i]->Print(indent+4);
|
||||
}
|
||||
}
|
||||
|
||||
return funSym;
|
||||
if (child != NULL)
|
||||
child->Print(indent + 4);
|
||||
|
||||
printf("]\n");
|
||||
}
|
||||
|
||||
|
||||
const Type *
|
||||
Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
void
|
||||
Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
||||
bool hasUniformQual = ((typeQualifiers & TYPEQUAL_UNIFORM) != 0);
|
||||
bool hasVaryingQual = ((typeQualifiers & TYPEQUAL_VARYING) != 0);
|
||||
bool isTask = ((typeQualifiers & TYPEQUAL_TASK) != 0);
|
||||
bool isExported = ((typeQualifiers & TYPEQUAL_EXPORT) != 0);
|
||||
bool isConst = ((typeQualifiers & TYPEQUAL_CONST) != 0);
|
||||
|
||||
if (hasUniformQual && hasVaryingQual) {
|
||||
Error(pos, "Can't provide both \"uniform\" and \"varying\" qualifiers.");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
if (kind != DK_FUNCTION && isTask)
|
||||
if (kind != DK_FUNCTION && isTask) {
|
||||
Error(pos, "\"task\" qualifier illegal in variable declaration.");
|
||||
return;
|
||||
}
|
||||
if (kind != DK_FUNCTION && isExported) {
|
||||
Error(pos, "\"export\" qualifier illegal in variable declaration.");
|
||||
return;
|
||||
}
|
||||
|
||||
const Type *type = base;
|
||||
switch (kind) {
|
||||
case DK_BASE:
|
||||
Variability variability(Variability::Unbound);
|
||||
if (hasUniformQual)
|
||||
variability = Variability::Uniform;
|
||||
else if (hasVaryingQual)
|
||||
variability = Variability::Varying;
|
||||
|
||||
if (kind == DK_BASE) {
|
||||
// All of the type qualifiers should be in the DeclSpecs for the
|
||||
// base declarator
|
||||
assert(typeQualifiers == 0);
|
||||
assert(child == NULL);
|
||||
return type;
|
||||
|
||||
case DK_POINTER:
|
||||
type = new PointerType(type, hasUniformQual, isConst);
|
||||
if (child != NULL)
|
||||
return child->GetType(type, ds);
|
||||
Assert(typeQualifiers == 0);
|
||||
Assert(child == NULL);
|
||||
type = baseType;
|
||||
}
|
||||
else if (kind == DK_POINTER) {
|
||||
/* For now, any pointer to an SOA type gets the slice property; if
|
||||
we add the capability to declare pointers as slices or not,
|
||||
we'll want to set this based on a type qualifier here. */
|
||||
const Type *ptrType = new PointerType(baseType, variability, isConst,
|
||||
baseType->IsSOAType());
|
||||
if (child != NULL) {
|
||||
child->InitFromType(ptrType, ds);
|
||||
type = child->type;
|
||||
name = child->name;
|
||||
}
|
||||
else
|
||||
return type;
|
||||
break;
|
||||
|
||||
case DK_REFERENCE:
|
||||
if (hasUniformQual)
|
||||
type = ptrType;
|
||||
}
|
||||
else if (kind == DK_REFERENCE) {
|
||||
if (hasUniformQual) {
|
||||
Error(pos, "\"uniform\" qualifier is illegal to apply to references.");
|
||||
if (hasVaryingQual)
|
||||
return;
|
||||
}
|
||||
if (hasVaryingQual) {
|
||||
Error(pos, "\"varying\" qualifier is illegal to apply to references.");
|
||||
if (isConst)
|
||||
return;
|
||||
}
|
||||
if (isConst) {
|
||||
Error(pos, "\"const\" qualifier is to illegal apply to references.");
|
||||
|
||||
return;
|
||||
}
|
||||
// The parser should disallow this already, but double check.
|
||||
if (dynamic_cast<const ReferenceType *>(type) != NULL) {
|
||||
if (dynamic_cast<const ReferenceType *>(baseType) != NULL) {
|
||||
Error(pos, "References to references are illegal.");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
type = new ReferenceType(type);
|
||||
if (child != NULL)
|
||||
return child->GetType(type, ds);
|
||||
const Type *refType = new ReferenceType(baseType);
|
||||
if (child != NULL) {
|
||||
child->InitFromType(refType, ds);
|
||||
type = child->type;
|
||||
name = child->name;
|
||||
}
|
||||
else
|
||||
return type;
|
||||
break;
|
||||
type = refType;
|
||||
}
|
||||
else if (kind == DK_ARRAY) {
|
||||
if (Type::Equal(baseType, AtomicType::Void)) {
|
||||
Error(pos, "Arrays of \"void\" type are illegal.");
|
||||
return;
|
||||
}
|
||||
if (dynamic_cast<const ReferenceType *>(baseType)) {
|
||||
Error(pos, "Arrays of references (type \"%s\") are illegal.",
|
||||
baseType->GetString().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
case DK_ARRAY:
|
||||
type = new ArrayType(type, arraySize);
|
||||
if (child)
|
||||
return child->GetType(type, ds);
|
||||
const Type *arrayType = new ArrayType(baseType, arraySize);
|
||||
if (child != NULL) {
|
||||
child->InitFromType(arrayType, ds);
|
||||
type = child->type;
|
||||
name = child->name;
|
||||
}
|
||||
else
|
||||
return type;
|
||||
break;
|
||||
|
||||
case DK_FUNCTION: {
|
||||
type = arrayType;
|
||||
}
|
||||
else if (kind == DK_FUNCTION) {
|
||||
std::vector<const Type *> args;
|
||||
std::vector<std::string> argNames;
|
||||
std::vector<ConstExpr *> argDefaults;
|
||||
std::vector<Expr *> argDefaults;
|
||||
std::vector<SourcePos> argPos;
|
||||
|
||||
// Loop over the function arguments and store the names, types,
|
||||
@@ -316,33 +397,44 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
for (unsigned int i = 0; i < functionParams.size(); ++i) {
|
||||
Declaration *d = functionParams[i];
|
||||
|
||||
char buf[32];
|
||||
Symbol *sym;
|
||||
if (d == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
if (d->declarators.size() == 0) {
|
||||
// function declaration like foo(float), w/o a name for
|
||||
// the parameter
|
||||
// function declaration like foo(float), w/o a name for the
|
||||
// parameter; wire up a placeholder Declarator for it
|
||||
d->declarators.push_back(new Declarator(DK_BASE, pos));
|
||||
d->declarators[0]->InitFromDeclSpecs(d->declSpecs);
|
||||
}
|
||||
|
||||
Assert(d->declarators.size() == 1);
|
||||
Declarator *decl = d->declarators[0];
|
||||
if (decl == NULL || decl->type == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (decl->name == "") {
|
||||
// Give a name to any anonymous parameter declarations
|
||||
char buf[32];
|
||||
sprintf(buf, "__anon_parameter_%d", i);
|
||||
sym = new Symbol(buf, pos);
|
||||
sym->type = d->declSpecs->GetBaseType(pos);
|
||||
}
|
||||
else {
|
||||
sym = d->declarators[0]->GetSymbol();
|
||||
if (sym == NULL) {
|
||||
// Handle more complex anonymous declarations like
|
||||
// float (float **).
|
||||
sprintf(buf, "__anon_parameter_%d", i);
|
||||
sym = new Symbol(buf, d->declarators[0]->pos);
|
||||
sym->type = d->declarators[0]->GetType(d->declSpecs);
|
||||
}
|
||||
decl->name = buf;
|
||||
}
|
||||
decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
if (d->declSpecs->storageClass != SC_NONE)
|
||||
Error(sym->pos, "Storage class \"%s\" is illegal in "
|
||||
Error(decl->pos, "Storage class \"%s\" is illegal in "
|
||||
"function parameter declaration for parameter \"%s\".",
|
||||
lGetStorageClassName(d->declSpecs->storageClass),
|
||||
sym->name.c_str());
|
||||
decl->name.c_str());
|
||||
if (Type::Equal(decl->type, AtomicType::Void)) {
|
||||
Error(decl->pos, "Parameter with type \"void\" illegal in function "
|
||||
"parameter list.");
|
||||
decl->type = NULL;
|
||||
}
|
||||
|
||||
const ArrayType *at = dynamic_cast<const ArrayType *>(sym->type);
|
||||
const ArrayType *at = dynamic_cast<const ArrayType *>(decl->type);
|
||||
if (at != NULL) {
|
||||
// As in C, arrays are passed to functions as pointers to
|
||||
// their element type. We'll just immediately make this
|
||||
@@ -352,115 +444,121 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
// report this differently than it was originally declared
|
||||
// in the function, but it's not clear that this is a
|
||||
// significant problem.)
|
||||
sym->type = PointerType::GetUniform(at->GetElementType());
|
||||
const Type *targetType = at->GetElementType();
|
||||
if (targetType == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
decl->type = PointerType::GetUniform(targetType);
|
||||
|
||||
// Make sure there are no unsized arrays (other than the
|
||||
// first dimension) in function parameter lists.
|
||||
at = dynamic_cast<const ArrayType *>(at->GetElementType());
|
||||
at = dynamic_cast<const ArrayType *>(targetType);
|
||||
while (at != NULL) {
|
||||
if (at->GetElementCount() == 0)
|
||||
Error(sym->pos, "Arrays with unsized dimensions in "
|
||||
Error(decl->pos, "Arrays with unsized dimensions in "
|
||||
"dimensions after the first one are illegal in "
|
||||
"function parameter lists.");
|
||||
at = dynamic_cast<const ArrayType *>(at->GetElementType());
|
||||
}
|
||||
}
|
||||
|
||||
args.push_back(sym->type);
|
||||
argNames.push_back(sym->name);
|
||||
argPos.push_back(sym->pos);
|
||||
args.push_back(decl->type);
|
||||
argNames.push_back(decl->name);
|
||||
argPos.push_back(decl->pos);
|
||||
|
||||
ConstExpr *init = NULL;
|
||||
if (d->declarators.size()) {
|
||||
// Try to find an initializer expression; if there is one,
|
||||
// it lives down to the base declarator.
|
||||
Declarator *decl = d->declarators[0];
|
||||
while (decl->child != NULL) {
|
||||
assert(decl->initExpr == NULL);
|
||||
Expr *init = NULL;
|
||||
// Try to find an initializer expression.
|
||||
while (decl != NULL) {
|
||||
if (decl->initExpr != NULL) {
|
||||
decl->initExpr = TypeCheck(decl->initExpr);
|
||||
decl->initExpr = Optimize(decl->initExpr);
|
||||
if (decl->initExpr != NULL) {
|
||||
init = dynamic_cast<ConstExpr *>(decl->initExpr);
|
||||
if (init == NULL)
|
||||
init = dynamic_cast<NullPointerExpr *>(decl->initExpr);
|
||||
if (init == NULL)
|
||||
Error(decl->initExpr->pos, "Default value for parameter "
|
||||
"\"%s\" must be a compile-time constant.",
|
||||
decl->name.c_str());
|
||||
}
|
||||
break;
|
||||
}
|
||||
else
|
||||
decl = decl->child;
|
||||
}
|
||||
|
||||
if (decl->initExpr != NULL &&
|
||||
(decl->initExpr = decl->initExpr->TypeCheck()) != NULL &&
|
||||
(decl->initExpr = decl->initExpr->Optimize()) != NULL &&
|
||||
(init = dynamic_cast<ConstExpr *>(decl->initExpr)) == NULL) {
|
||||
Error(decl->initExpr->pos, "Default value for parameter "
|
||||
"\"%s\" must be a compile-time constant.",
|
||||
sym->name.c_str());
|
||||
}
|
||||
}
|
||||
argDefaults.push_back(init);
|
||||
}
|
||||
|
||||
const Type *returnType = type;
|
||||
const Type *returnType = baseType;
|
||||
if (returnType == NULL) {
|
||||
Error(pos, "No return type provided in function declaration.");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
bool isExported = ds && (ds->storageClass == SC_EXPORT);
|
||||
if (dynamic_cast<const FunctionType *>(returnType) != NULL) {
|
||||
Error(pos, "Illegal to return function type from function.");
|
||||
return;
|
||||
}
|
||||
|
||||
returnType = returnType->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
|
||||
bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0);
|
||||
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
|
||||
|
||||
if (isExported && isTask) {
|
||||
Error(pos, "Function can't have both \"task\" and \"export\" "
|
||||
"qualifiers");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
if (isExternC && isTask) {
|
||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
|
||||
"qualifiers");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
if (isExternC && isExported) {
|
||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "
|
||||
"qualifiers");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
Type *functionType =
|
||||
new FunctionType(returnType, args, pos, argNames, argDefaults,
|
||||
if (child == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
const FunctionType *functionType =
|
||||
new FunctionType(returnType, args, argNames, argDefaults,
|
||||
argPos, isTask, isExported, isExternC);
|
||||
return child->GetType(functionType, ds);
|
||||
|
||||
// handle any explicit __declspecs on the function
|
||||
if (ds != NULL) {
|
||||
for (int i = 0; i < (int)ds->declSpecList.size(); ++i) {
|
||||
std::string str = ds->declSpecList[i].first;
|
||||
SourcePos pos = ds->declSpecList[i].second;
|
||||
|
||||
if (str == "safe")
|
||||
(const_cast<FunctionType *>(functionType))->isSafe = true;
|
||||
else if (!strncmp(str.c_str(), "cost", 4)) {
|
||||
int cost = atoi(str.c_str() + 4);
|
||||
if (cost < 0)
|
||||
Error(pos, "Negative function cost %d is illegal.",
|
||||
cost);
|
||||
(const_cast<FunctionType *>(functionType))->costOverride = cost;
|
||||
}
|
||||
else
|
||||
Error(pos, "__declspec parameter \"%s\" unknown.", str.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
child->InitFromType(functionType, ds);
|
||||
type = child->type;
|
||||
name = child->name;
|
||||
}
|
||||
default:
|
||||
FATAL("Unexpected decl kind");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Make sure we actually have an array of structs ..
|
||||
const StructType *childStructType =
|
||||
dynamic_cast<const StructType *>(childType);
|
||||
if (childStructType == NULL) {
|
||||
Error(pos, "Illegal to provide soa<%d> qualifier with non-struct "
|
||||
"type \"%s\".", soaWidth, childType->GetString().c_str());
|
||||
return new ArrayType(childType, arraySize == -1 ? 0 : arraySize);
|
||||
}
|
||||
else if ((soaWidth & (soaWidth - 1)) != 0) {
|
||||
Error(pos, "soa<%d> width illegal. Value must be power of two.",
|
||||
soaWidth);
|
||||
return NULL;
|
||||
}
|
||||
else if (arraySize != -1 && (arraySize % soaWidth) != 0) {
|
||||
Error(pos, "soa<%d> width must evenly divide array size %d.",
|
||||
soaWidth, arraySize);
|
||||
return NULL;
|
||||
}
|
||||
return new SOAArrayType(childStructType, arraySize == -1 ? 0 : arraySize,
|
||||
soaWidth);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
const Type *
|
||||
Declarator::GetType(DeclSpecs *ds) const {
|
||||
const Type *baseType = ds->GetBaseType(pos);
|
||||
const Type *type = GetType(baseType, ds);
|
||||
return type;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Declaration
|
||||
|
||||
@@ -485,42 +583,66 @@ Declaration::Declaration(DeclSpecs *ds, Declarator *d) {
|
||||
|
||||
std::vector<VariableDeclaration>
|
||||
Declaration::GetVariableDeclarations() const {
|
||||
assert(declSpecs->storageClass != SC_TYPEDEF);
|
||||
Assert(declSpecs->storageClass != SC_TYPEDEF);
|
||||
std::vector<VariableDeclaration> vars;
|
||||
|
||||
for (unsigned int i = 0; i < declarators.size(); ++i) {
|
||||
if (declarators[i] == NULL)
|
||||
continue;
|
||||
Declarator *decl = declarators[i];
|
||||
if (decl == NULL)
|
||||
if (decl == NULL || decl->type == NULL) {
|
||||
// Ignore earlier errors
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
|
||||
Symbol *sym = decl->GetSymbol();
|
||||
if (dynamic_cast<const FunctionType *>(sym->type) != NULL) {
|
||||
// function declaration
|
||||
m->symbolTable->AddFunction(sym);
|
||||
}
|
||||
else {
|
||||
|
||||
if (Type::Equal(decl->type, AtomicType::Void))
|
||||
Error(decl->pos, "\"void\" type variable illegal in declaration.");
|
||||
else if (dynamic_cast<const FunctionType *>(decl->type) == NULL) {
|
||||
decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
|
||||
Symbol *sym = new Symbol(decl->name, decl->pos, decl->type,
|
||||
decl->storageClass);
|
||||
m->symbolTable->AddVariable(sym);
|
||||
vars.push_back(VariableDeclaration(sym, decl->initExpr));
|
||||
}
|
||||
}
|
||||
|
||||
return vars;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Declaration::Print() const {
|
||||
printf("Declaration: specs [");
|
||||
declSpecs->Print();
|
||||
printf("], declarators [");
|
||||
for (unsigned int i = 0 ; i < declarators.size(); ++i) {
|
||||
declarators[i]->Print();
|
||||
printf("%s", (i == declarators.size() - 1) ? "]" : ", ");
|
||||
Declaration::DeclareFunctions() {
|
||||
Assert(declSpecs->storageClass != SC_TYPEDEF);
|
||||
|
||||
for (unsigned int i = 0; i < declarators.size(); ++i) {
|
||||
Declarator *decl = declarators[i];
|
||||
if (decl == NULL || decl->type == NULL) {
|
||||
// Ignore earlier errors
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
const FunctionType *ftype =
|
||||
dynamic_cast<const FunctionType *>(decl->type);
|
||||
if (ftype == NULL)
|
||||
continue;
|
||||
|
||||
bool isInline = (declSpecs->typeQualifiers & TYPEQUAL_INLINE);
|
||||
m->AddFunctionDeclaration(decl->name, ftype, decl->storageClass,
|
||||
isInline, decl->pos);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Declaration::Print(int indent) const {
|
||||
printf("%*cDeclaration: specs [", indent, ' ');
|
||||
declSpecs->Print();
|
||||
printf("], declarators:\n");
|
||||
for (unsigned int i = 0 ; i < declarators.size(); ++i)
|
||||
declarators[i]->Print(indent+4);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void
|
||||
@@ -537,35 +659,42 @@ GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
|
||||
// FIXME: making this fake little DeclSpecs here is really
|
||||
// disgusting
|
||||
DeclSpecs ds(type);
|
||||
if (type->IsUniformType())
|
||||
ds.typeQualifiers |= TYPEQUAL_UNIFORM;
|
||||
else
|
||||
ds.typeQualifiers |= TYPEQUAL_VARYING;
|
||||
if (Type::Equal(type, AtomicType::Void) == false) {
|
||||
if (type->IsUniformType())
|
||||
ds.typeQualifiers |= TYPEQUAL_UNIFORM;
|
||||
else if (type->IsVaryingType())
|
||||
ds.typeQualifiers |= TYPEQUAL_VARYING;
|
||||
else if (type->GetSOAWidth() != 0)
|
||||
ds.soaWidth = type->GetSOAWidth();
|
||||
// FIXME: ds.vectorSize?
|
||||
}
|
||||
|
||||
for (unsigned int j = 0; j < sd[i]->declarators->size(); ++j) {
|
||||
Declarator *d = (*sd[i]->declarators)[j];
|
||||
d->InitFromDeclSpecs(&ds);
|
||||
|
||||
Symbol *sym = d->GetSymbol();
|
||||
if (Type::Equal(d->type, AtomicType::Void))
|
||||
Error(d->pos, "\"void\" type illegal for struct member.");
|
||||
|
||||
const ArrayType *arrayType =
|
||||
dynamic_cast<const ArrayType *>(sym->type);
|
||||
if (arrayType != NULL && arrayType->GetElementCount() == 0) {
|
||||
Error(d->pos, "Unsized arrays aren't allowed in struct "
|
||||
"definitions.");
|
||||
elementTypes->push_back(NULL);
|
||||
}
|
||||
else
|
||||
elementTypes->push_back(sym->type);
|
||||
elementTypes->push_back(d->type);
|
||||
|
||||
if (seenNames.find(sym->name) != seenNames.end())
|
||||
if (seenNames.find(d->name) != seenNames.end())
|
||||
Error(d->pos, "Struct member \"%s\" has same name as a "
|
||||
"previously-declared member.", sym->name.c_str());
|
||||
"previously-declared member.", d->name.c_str());
|
||||
else
|
||||
seenNames.insert(sym->name);
|
||||
seenNames.insert(d->name);
|
||||
|
||||
elementNames->push_back(sym->name);
|
||||
elementPositions->push_back(sym->pos);
|
||||
elementNames->push_back(d->name);
|
||||
elementPositions->push_back(d->pos);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)elementTypes->size() - 1; ++i) {
|
||||
const ArrayType *arrayType =
|
||||
dynamic_cast<const ArrayType *>((*elementTypes)[i]);
|
||||
|
||||
if (arrayType != NULL && arrayType->GetElementCount() == 0)
|
||||
Error((*elementPositions)[i], "Unsized arrays aren't allowed except "
|
||||
"for the last member in a struct definition.");
|
||||
}
|
||||
}
|
||||
|
||||
58
decl.h
58
decl.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -47,8 +47,8 @@
|
||||
variables--here, that the declaration has the 'static' and 'uniform'
|
||||
qualifiers, and that it's basic type is 'int'. Then for each variable
|
||||
declaration, the Declaraiton class holds an instance of a Declarator,
|
||||
which in turn records the per-variable information like the symbol
|
||||
name, array size (if any), initializer expression, etc.
|
||||
which in turn records the per-variable information like the name, array
|
||||
size (if any), initializer expression, etc.
|
||||
*/
|
||||
|
||||
#ifndef ISPC_DECL_H
|
||||
@@ -61,16 +61,6 @@ struct VariableDeclaration;
|
||||
class Declaration;
|
||||
class Declarator;
|
||||
|
||||
enum StorageClass {
|
||||
SC_NONE,
|
||||
SC_EXTERN,
|
||||
SC_EXPORT,
|
||||
SC_STATIC,
|
||||
SC_TYPEDEF,
|
||||
SC_EXTERN_C
|
||||
};
|
||||
|
||||
|
||||
/* Multiple qualifiers can be provided with types in declarations;
|
||||
therefore, they are set up so that they can be ANDed together into an
|
||||
int. */
|
||||
@@ -82,6 +72,7 @@ enum StorageClass {
|
||||
#define TYPEQUAL_SIGNED (1<<4)
|
||||
#define TYPEQUAL_UNSIGNED (1<<5)
|
||||
#define TYPEQUAL_INLINE (1<<6)
|
||||
#define TYPEQUAL_EXPORT (1<<7)
|
||||
|
||||
/** @brief Representation of the declaration specifiers in a declaration.
|
||||
|
||||
@@ -90,7 +81,8 @@ enum StorageClass {
|
||||
*/
|
||||
class DeclSpecs {
|
||||
public:
|
||||
DeclSpecs(const Type *t = NULL, StorageClass sc = SC_NONE, int tq = TYPEQUAL_NONE);
|
||||
DeclSpecs(const Type *t = NULL, StorageClass sc = SC_NONE,
|
||||
int tq = TYPEQUAL_NONE);
|
||||
|
||||
void Print() const;
|
||||
|
||||
@@ -117,6 +109,8 @@ public:
|
||||
SOA width specified. Otherwise this is zero.
|
||||
*/
|
||||
int soaWidth;
|
||||
|
||||
std::vector<std::pair<std::string, SourcePos> > declSpecList;
|
||||
};
|
||||
|
||||
|
||||
@@ -138,25 +132,13 @@ public:
|
||||
Declarator(DeclaratorKind dk, SourcePos p);
|
||||
|
||||
/** Once a DeclSpecs instance is available, this method completes the
|
||||
initialization of the Symbol, setting its Type accordingly.
|
||||
initialization of the type member.
|
||||
*/
|
||||
void InitFromDeclSpecs(DeclSpecs *ds);
|
||||
|
||||
/** Get the actual type of the combination of Declarator and the given
|
||||
DeclSpecs. If an explicit base type is provided, the declarator is
|
||||
applied to that type; otherwise the base type from the DeclSpecs is
|
||||
used. */
|
||||
const Type *GetType(DeclSpecs *ds) const;
|
||||
const Type *GetType(const Type *base, DeclSpecs *ds) const;
|
||||
void InitFromType(const Type *base, DeclSpecs *ds);
|
||||
|
||||
/** Returns the symbol corresponding to the function declared by this
|
||||
declarator and symbols for its arguments in *args. */
|
||||
Symbol *GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *args);
|
||||
|
||||
/** Returns the symbol associated with the declarator. */
|
||||
Symbol *GetSymbol() const;
|
||||
|
||||
void Print() const;
|
||||
void Print(int indent) const;
|
||||
|
||||
/** Position of the declarator in the source program. */
|
||||
const SourcePos pos;
|
||||
@@ -175,18 +157,24 @@ public:
|
||||
/** Type qualifiers provided with the declarator. */
|
||||
int typeQualifiers;
|
||||
|
||||
StorageClass storageClass;
|
||||
|
||||
/** For array declarators, this gives the declared size of the array.
|
||||
Unsized arrays have arraySize == 0. */
|
||||
int arraySize;
|
||||
|
||||
/** Symbol associated with the declarator. */
|
||||
Symbol *sym;
|
||||
/** Name associated with the declarator. */
|
||||
std::string name;
|
||||
|
||||
/** Initialization expression for the variable. May be NULL. */
|
||||
Expr *initExpr;
|
||||
|
||||
/** Type of the declarator. This is NULL until InitFromDeclSpecs() or
|
||||
InitFromType() is called. */
|
||||
const Type *type;
|
||||
|
||||
/** For function declarations, this holds the Declaration *s for the
|
||||
funciton's parameters. */
|
||||
function's parameters. */
|
||||
std::vector<Declaration *> functionParams;
|
||||
};
|
||||
|
||||
@@ -199,7 +187,7 @@ public:
|
||||
Declaration(DeclSpecs *ds, std::vector<Declarator *> *dlist = NULL);
|
||||
Declaration(DeclSpecs *ds, Declarator *d);
|
||||
|
||||
void Print() const;
|
||||
void Print(int indent) const;
|
||||
|
||||
/** This method walks through all of the Declarators in a declaration
|
||||
and returns a fully-initialized Symbol and (possibly) and
|
||||
@@ -208,6 +196,10 @@ public:
|
||||
Declarator representation.) */
|
||||
std::vector<VariableDeclaration> GetVariableDeclarations() const;
|
||||
|
||||
/** For any function declarations in the Declaration, add the
|
||||
declaration to the module. */
|
||||
void DeclareFunctions();
|
||||
|
||||
DeclSpecs *declSpecs;
|
||||
std::vector<Declarator *> declarators;
|
||||
};
|
||||
|
||||
@@ -1,3 +1,247 @@
|
||||
=== v1.2.2 === (20 April 2012)
|
||||
|
||||
This release includes a number of small additions to functionality and a
|
||||
number of bugfixes. New functionality includes:
|
||||
|
||||
* It's now possible to forward declare structures as in C/C++: "struct
|
||||
Foo;". After such a declaration, structs with pointers to "Foo" and
|
||||
functions that take pointers or references to Foo structs can be declared
|
||||
without the entire definition of Foo being available.
|
||||
|
||||
* New built-in types size_t, ptrdiff_t, and [u]intptr_t are now available,
|
||||
corresponding to the equivalent types in C.
|
||||
|
||||
* The standard library now provides atomic_swap*() and
|
||||
atomic_compare_exchange*() functions for void * types.
|
||||
|
||||
* The C++ backend has seen a number of improvements to the quality and
|
||||
readability of generated code.
|
||||
|
||||
A number of bugs have been fixed in this release as well. The most
|
||||
significant are:
|
||||
|
||||
* Fixed a bug where nested loops could cause a compiler crash in some
|
||||
circumstances (issues #240, and #229)
|
||||
|
||||
* Gathers could access invlaid mamory (and cause the program to crash) in
|
||||
some circumstances (#235)
|
||||
|
||||
* References to temporary values are now handled properly when passed to a
|
||||
function that takes a reference typed parameter.
|
||||
|
||||
* A case where incorrect code could be generated for compile-time-constant
|
||||
initializers has been fixed (#234).
|
||||
|
||||
=== v1.2.1 === (6 April 2012)
|
||||
|
||||
This release contains only minor new functionality and is mostly for many
|
||||
small bugfixes and improvements to error handling and error reporting.
|
||||
The new functionality that is present is:
|
||||
|
||||
* Significantly more efficient versions of the float / half conversion
|
||||
routines are now available in the standard library, thanks to Fabian
|
||||
Giesen.
|
||||
|
||||
* The last member of a struct can now be a zero-length array; this allows
|
||||
the trick of dynamically allocating enough storage for the struct and
|
||||
some number of array elements at the end of it.
|
||||
|
||||
Significant bugs fixed include:
|
||||
|
||||
* Issue #205: When a target ISA isn't specified, use the host system's
|
||||
capabilities to choose a target for which it will be able to run the
|
||||
generated code.
|
||||
|
||||
* Issues #215 and #217: Don't allocate storage for global variables that
|
||||
are declared "extern".
|
||||
|
||||
* Issue #197: Allow NULL as a default argument value in a function
|
||||
declaration.
|
||||
|
||||
* Issue #223: Fix bugs where taking the address of a function wouldn't work
|
||||
as expected.
|
||||
|
||||
* Issue #224: When there are overloaded variants of a function that take
|
||||
both reference and const reference parameters, give the non-const
|
||||
reference preference when matching values of that underlying type.
|
||||
|
||||
* Issue #225: An error is issed when a varying lvalue is assigned to a
|
||||
reference type (rather than crashing).
|
||||
|
||||
* Issue #193: Permit conversions from array types to void *, not just the
|
||||
pointer type of the underlying array element.
|
||||
|
||||
* Issue #199: Still evaluate expressions that are cast to (void).
|
||||
|
||||
The documentation has also been improved, with FAQs added to clarify some
|
||||
aspects of the ispc pointer model.
|
||||
|
||||
=== v1.2.0 === (20 March 2012)
|
||||
|
||||
This is a major new release of ispc, with a number of significant
|
||||
improvements to functionality, performance, and compiler robustness. It
|
||||
does, however, include three small changes to language syntax and semantics
|
||||
that may require changes to existing programs:
|
||||
|
||||
* Syntax for the "launch" keyword has been cleaned up; it's now no longer
|
||||
necessary to bracket the launched function call with angle brackets.
|
||||
(In other words, now use "launch foo();", rather than "launch < foo() >;".
|
||||
|
||||
* When using pointers, the pointed-to data type is now "uniform" by
|
||||
default. Use the varying keyword to specify varying pointed-to types when
|
||||
needed. (i.e. "float *ptr" is a varying pointer to uniform float data,
|
||||
whereas previously it was a varying pointer to varying float values.)
|
||||
Use "varying float *" to specify a varying pointer to varying float data,
|
||||
and so forth.
|
||||
|
||||
* The details of "uniform" and "varying" and how they interact with struct
|
||||
types have been cleaned up. Now, when a struct type is declared, if the
|
||||
struct elements don't have explicit "uniform" or "varying" qualifiers,
|
||||
they are said to have "unbound" variability. When a struct type is
|
||||
instantiated, any unbound variability elements inherit the variability of
|
||||
the parent struct type. See http://ispc.github.com/ispc.html#struct-types
|
||||
for more details.
|
||||
|
||||
ispc has a new language feature that makes it much easier to use the
|
||||
efficient "(array of) structure of arrays" (AoSoA, or SoA) memory layout of
|
||||
data. A new "soa<n>" qualifier can be applied to structure types to
|
||||
specify an n-wide SoA version of the corresponding type. Array indexing
|
||||
and pointer operations with arrays SoA types automatically handles the
|
||||
two-stage indexing calculation to access the data. See
|
||||
http://ispc.github.com/ispc.html#structure-of-array-types for more details.
|
||||
|
||||
For more efficient access of data that is still in "array of structures"
|
||||
(AoS) format, ispc has a new "memory coalescing" optimization that
|
||||
automatically detects series of strided loads and/or gathers that can be
|
||||
transformed into a more efficient set of vector loads and shuffles. A
|
||||
diagnostic is emitted when this optimization is successfully applied.
|
||||
|
||||
Smaller changes in this release:
|
||||
|
||||
* The standard library now provides memcpy(), memmove() and memset()
|
||||
functions, as well as single-precision asin() and acos() functions.
|
||||
|
||||
* -I can now be specified on the command-line to specify a search path for
|
||||
#include files.
|
||||
|
||||
* A number of improvements have been made to error reporting from the
|
||||
parser, and a number of cases where malformed programs could cause the
|
||||
compiler to crash have been fixed.
|
||||
|
||||
* A number of small improvements to the quality and performance of generated
|
||||
code have been made, including finding more cases where 32-bit addressing
|
||||
calculations can be safely done on 64-bit systems and generating better
|
||||
code for initializer expressions.
|
||||
|
||||
=== v1.1.4 === (4 February 2012)
|
||||
|
||||
There are two major bugfixes for Windows in this release. First, a number
|
||||
of failures in AVX code generation on Windows have been fixed; AVX on
|
||||
Windows now has no known issues. Second, a longstanding bug in parsing 64-bit
|
||||
integer constants on Windows has been fixed.
|
||||
|
||||
This release features a new experimental scalar target, contributed by Gabe
|
||||
Weisz <gweisz@cs.cmu.edu>. This target ("--target=generic-1") compiles
|
||||
gangs of single program instances (i.e. programCount == 1); it can be
|
||||
useful for debugging ispc programs.
|
||||
|
||||
The compiler now supports dynamic memory allocation in ispc programs (with
|
||||
"new" and "delete" operators based on C++). See
|
||||
http://ispc.github.com/ispc.html#dynamic-memory-allocation in the
|
||||
documentation for more information.
|
||||
|
||||
ispc now performs "short circuit" evaluation of the || and && logical
|
||||
operators and the ? : selection operator. (This represents the correction
|
||||
of a major incompatibility with C.) Code like "(index < arraySize &&
|
||||
array[index] == 1)" thus now executes as in C, where "array[index]" won't
|
||||
be evaluated unless "index" is less than "arraySize".
|
||||
|
||||
The standard library now provides "local" atomic operations, which are
|
||||
atomic across the gang of program instances (but not across other gangs or
|
||||
other hardware threads. See the updated documentation on atomics for more
|
||||
information:
|
||||
http://ispc.github.com/ispc.html#atomic-operations-and-memory-fences.
|
||||
|
||||
The standard library now offers a clock() function, which returns a uniform
|
||||
int64 value that counts processor cycles; it can be used for
|
||||
fine-resolution timing measurements.
|
||||
|
||||
Finally (of limited interest now): ispc now supports the forthcoming AVX2
|
||||
instruction set, due with Haswell-generation CPUs. All tests and examples
|
||||
compile and execute correctly with AVX2. (Thanks specifically to Craig
|
||||
Topper and Nadav Rotem for work on AVX2 support in LLVM, which made this
|
||||
possible.)
|
||||
|
||||
=== v1.1.3 === (20 January 2012)
|
||||
|
||||
With this release, the language now supports "switch" statements, with the
|
||||
same semantics and syntax as in C.
|
||||
|
||||
This release includes fixes for two important performance related issues:
|
||||
the quality of code generated for "foreach" statements has been
|
||||
substantially improved (https://github.com/ispc/ispc/issues/151), and a
|
||||
performance regression with code for "gathers" that was introduced in
|
||||
v1.1.2 has been fixed in this release.
|
||||
|
||||
A number of other small bugs were fixed in this release as well, including
|
||||
one where invalid memory would sometimes be incorrectly accessed
|
||||
(https://github.com/ispc/ispc/issues/160).
|
||||
|
||||
Thanks to Jean-Luc Duprat for a number of patches that improve support for
|
||||
building on various platforms, and to Pierre-Antoine Lacaze for patches so
|
||||
that ispc builds under MinGW.
|
||||
|
||||
=== v1.1.2 === (9 January 2012)
|
||||
|
||||
The major new feature in this release is support for "generic" C++
|
||||
vectorized output; in other words, ispc can emit C++ code that corresponds
|
||||
to the vectorized computation that the ispc program represents. See the
|
||||
examples/intrinsics directory in the ispc distribution for two example
|
||||
implementations of the set of functions that must be provided map the
|
||||
vector calls generated by ispc to target specific functions.
|
||||
|
||||
ispc now has partial support for 'goto' statements; specifically, goto is
|
||||
allowed if any enclosing control flow statements (if/for/while/do) have
|
||||
'uniform' test expressions, but not if they have 'varying' tests.
|
||||
|
||||
A number of improvements have been made to the code generated for gathers
|
||||
and scatters--one of them (better matching x86's "free" scale by 2/4/8 for
|
||||
addressing calculations) improved the performance of the noise example by
|
||||
14%.
|
||||
|
||||
Many small bugs have been fixed in this release as well, including issue
|
||||
numbers 138, 129, 135, 127, 149, and 142.
|
||||
|
||||
=== v1.1.1 === (15 December 2011)
|
||||
|
||||
This release doesn't include any significant new functionality, but does
|
||||
include a small improvements in generated code and a number of bug fixes.
|
||||
|
||||
The one user-visible language change is that integer constants may be
|
||||
specified with 'u' and 'l' suffixes, like in C. For example, "1024llu"
|
||||
defines the constant with unsigned 64-bit type.
|
||||
|
||||
More informative and useful error messages are printed when function
|
||||
overload resolution fails.
|
||||
|
||||
Masking is avoided in additional cases when the mask can be
|
||||
statically-determined to be all on.
|
||||
|
||||
A number of small bugs have been fixed:
|
||||
- Under some circumstances, incorrect masks were used when assigning a
|
||||
value to a reference and when doing gathers/scatters.
|
||||
- Incorrect code could be generated in some cases when some instances
|
||||
returned part way through a function but others contineud executing.
|
||||
- Type checking wasn't being performed for calls through function pointers;
|
||||
now an error is issued if the arguments don't match up, etc.
|
||||
- Incorrect code was being generated for gather/scatter to structs that had
|
||||
elements with varying short-vector types.
|
||||
- Typechecking wasn't being performed for "foreach" statements; this led to
|
||||
problems like function overload resolution not being performed if an
|
||||
overloaded function call was used to determine the iteration range..
|
||||
- A number of symbols would be multiply-defined when compiling to multiple
|
||||
targets and using the sse2-x2 target as one of them (issue #131).
|
||||
|
||||
=== v1.1.0 === (5 December 2011)
|
||||
|
||||
This is a major new release of the compiler, with significant additions to
|
||||
|
||||
@@ -2,11 +2,14 @@
|
||||
|
||||
for i in ispc perfguide faq; do
|
||||
rst2html.py --template=template.txt --link-stylesheet \
|
||||
--stylesheet-path=css/style.css $i.txt > $i.html
|
||||
--stylesheet-path=css/style.css $i.rst > $i.html
|
||||
done
|
||||
|
||||
rst2html.py --template=template-news.txt --link-stylesheet \
|
||||
--stylesheet-path=css/style.css news.rst > news.html
|
||||
|
||||
rst2html.py --template=template-perf.txt --link-stylesheet \
|
||||
--stylesheet-path=css/style.css perf.txt > perf.html
|
||||
--stylesheet-path=css/style.css perf.rst > perf.html
|
||||
|
||||
#rst2latex --section-numbering --documentclass=article --documentoptions=DIV=9,10pt,letterpaper ispc.txt > ispc.tex
|
||||
#pdflatex ispc.tex
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
=============================================================
|
||||
Intel® SPMD Program Compiler Frequently Asked Questions (FAQ)
|
||||
=============================================================
|
||||
=====================================
|
||||
Frequently Asked Questions About ispc
|
||||
=====================================
|
||||
|
||||
This document includes a number of frequently (and not frequently) asked
|
||||
questions about ispc, the Intel® SPMD Program Compiler. The source to this
|
||||
document is in the file ``docs/faq.txt`` in the ``ispc`` source
|
||||
document is in the file ``docs/faq.rst`` in the ``ispc`` source
|
||||
distribution.
|
||||
|
||||
* Understanding ispc's Output
|
||||
@@ -14,11 +14,19 @@ distribution.
|
||||
+ `Why are there multiple versions of exported ispc functions in the assembly output?`_
|
||||
+ `How can I more easily see gathers and scatters in generated assembly?`_
|
||||
|
||||
* Language Details
|
||||
|
||||
+ `What is the difference between "int *foo" and "int foo[]"?`_
|
||||
+ `Why are pointed-to types "uniform" by default?`_
|
||||
+ `What am I getting an error about assigning a varying lvalue to a reference type?`_
|
||||
|
||||
* Interoperability
|
||||
|
||||
+ `How can I supply an initial execution mask in the call from the application?`_
|
||||
+ `How can I generate a single binary executable with support for multiple instruction sets?`_
|
||||
+ `How can I determine at run-time which vector instruction set's instructions were selected to execute?`_
|
||||
+ `Is it possible to inline ispc functions in C/C++ code?`_
|
||||
+ `Why is it illegal to pass "varying" values from C/C++ to ispc functions?`_
|
||||
|
||||
* Programming Techniques
|
||||
|
||||
@@ -212,6 +220,125 @@ easier to understand:
|
||||
jmp ___pseudo_scatter_base_offsets32_32 ## TAILCALL
|
||||
|
||||
|
||||
Language Details
|
||||
================
|
||||
|
||||
What is the difference between "int \*foo" and "int foo[]"?
|
||||
-----------------------------------------------------------
|
||||
|
||||
In C and C++, declaring a function to take a parameter ``int *foo`` and
|
||||
``int foo[]`` results in the same type for the parameter. Both are
|
||||
pointers to integers. In ``ispc``, these are different types. The first
|
||||
one is a varying pointer to a uniform integer value in memory, while the
|
||||
second results in a uniform pointer to the start of an array of varying
|
||||
integer values in memory.
|
||||
|
||||
To understand why the first is a varying pointer to a uniform integer,
|
||||
first recall that types without explicit rate qualifiers (``uniform``,
|
||||
``varying``, or ``soa<>``) are ``varying`` by default. Second, recall from
|
||||
the `discussion of pointer types in the ispc User's Guide`_ that pointed-to
|
||||
types without rate qualifiers are ``uniform`` by default. (This second
|
||||
rule is discussed further below, in `Why are pointed-to types "uniform" by
|
||||
default?`_.) The type of ``int *foo`` follows from these.
|
||||
|
||||
.. _discussion of pointer types in the ispc User's Guide: ispc.html#pointer-types
|
||||
|
||||
Conversely, in a function body, ``int foo[10]`` represents a declaration of
|
||||
a 10-element array of varying ``int`` values. In that we'd certainly like
|
||||
to be able to pass such an array to a function that takes a ``int []``
|
||||
parameter, the natural type for an ``int []`` parameter is a uniform
|
||||
pointer to varying integer values.
|
||||
|
||||
In terms of compatibility with C/C++, it's unfortunate that this
|
||||
distinction exists, though any other set of rules seems to introduce more
|
||||
awkwardness than this one. (Though we're interested to hear ideas to
|
||||
improve these rules!).
|
||||
|
||||
Why are pointed-to types "uniform" by default?
|
||||
----------------------------------------------
|
||||
|
||||
In ``ispc``, types without rate qualifiers are "varying" by default, but
|
||||
types pointed to by pointers without rate qualifiers are "uniform" by
|
||||
default. Why this difference?
|
||||
|
||||
::
|
||||
|
||||
int foo; // no rate qualifier, "varying int".
|
||||
uniform int *foo; // pointer type has no rate qualifier, pointed-to does.
|
||||
// "varying pointer to uniform int".
|
||||
int *foo; // neither pointer type nor pointed-to type ("int") have
|
||||
// rate qualifiers. Pointer type is varying by default,
|
||||
// pointed-to is uniform. "varying pointer to uniform int".
|
||||
varying int *foo; // varying pointer to varying int
|
||||
|
||||
The first rule, having types without rate qualifiers be varying by default,
|
||||
is a default that keeps the number of "uniform" or "varying" qualifiers in
|
||||
``ispc`` programs low. Most ``ispc`` programs use mostly "varying"
|
||||
variables, so this rule allows most variables to be declared without also
|
||||
requiring rate qualifiers.
|
||||
|
||||
On a related note, this rule allows many C/C++ functions to be used to
|
||||
define equivalent functions in the SPMD execution model that ``ispc``
|
||||
provides with little or no modification:
|
||||
|
||||
::
|
||||
|
||||
// scalar add in C/C++, SPMD/vector add in ispc
|
||||
int add(int a, int b) { return a + b; }
|
||||
|
||||
This motivation also explains why ``uniform int *foo`` represents a varying
|
||||
pointer; having pointers be varying by default if they don't have rate
|
||||
qualifiers similarly helps with porting code from C/C++ to ``ispc``.
|
||||
|
||||
The tricker issue is why pointed-to types are "uniform" by default. In our
|
||||
experience, data in memory that is accessed via pointers is most often
|
||||
uniform; this generally includes all data that has been allocated and
|
||||
initialized by the C/C++ application code. In practice, "varying" types are
|
||||
more generally (but not exclusively) used for local data in ``ispc``
|
||||
functions. Thus, making the pointed-to type uniform by default leads to
|
||||
more concise code for the most common cases.
|
||||
|
||||
|
||||
What am I getting an error about assigning a varying lvalue to a reference type?
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Given code like the following:
|
||||
|
||||
::
|
||||
|
||||
uniform float a[...];
|
||||
int index = ...;
|
||||
float &r = a[index];
|
||||
|
||||
``ispc`` issues the error "Initializer for reference-type variable "r" must
|
||||
have a uniform lvalue type.". The underlying issue stems from how
|
||||
references are represented in the code generated by ``ispc``. Recall that
|
||||
``ispc`` supports both uniform and varying pointer types--a uniform pointer
|
||||
points to the same location in memory for all program instances in the
|
||||
gang, while a varying pointer allows each program instance to have its own
|
||||
pointer value.
|
||||
|
||||
References are represented a pointer in the code generated by ``ispc``,
|
||||
though this is generally opaque to the user; in ``ispc``, they are
|
||||
specifically uniform pointers. This design decision was made so that given
|
||||
code like this:
|
||||
|
||||
::
|
||||
|
||||
extern void func(float &val);
|
||||
float foo = ...;
|
||||
func(foo);
|
||||
|
||||
Then the reference would be handled efficiently as a single pointer, rather
|
||||
than unnecessarily being turned into a gang-size of pointers.
|
||||
|
||||
However, an implication of this decision is that it's not possible for
|
||||
references to refer to completely different things for each of the program
|
||||
instances. (And hence the error that is issued). In cases where a unique
|
||||
per-program-instance pointer is needed, a varying pointer should be used
|
||||
instead of a reference.
|
||||
|
||||
|
||||
Interoperability
|
||||
================
|
||||
|
||||
@@ -273,10 +400,10 @@ Then four object files will be generated: ``foo_sse2.o``, ``foo_sse4.o``,
|
||||
``foo_avx.o``, and ``foo.o``.[#]_ Link all of these into your executable, and
|
||||
when you call a function in ``foo.ispc`` from your application code,
|
||||
``ispc`` will determine which instruction sets are supported by the CPU the
|
||||
code is running on and will call the most appropraite version of the
|
||||
code is running on and will call the most appropriate version of the
|
||||
function available.
|
||||
|
||||
.. [#] Similarly, if you choose to generate assembly langauage output or
|
||||
.. [#] Similarly, if you choose to generate assembly language output or
|
||||
LLVM bitcode output, multiple versions of those files will be created.
|
||||
|
||||
In general, the version of the function that runs will be the one in the
|
||||
@@ -346,6 +473,92 @@ In a similar fashion, it's possible to find out at run-time the value of
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
Is it possible to inline ispc functions in C/C++ code?
|
||||
------------------------------------------------------
|
||||
|
||||
If you're willing to use the ``clang`` C/C++ compiler that's part of the
|
||||
LLVM tool suite, then it is possible to inline ``ispc`` code with C/C++
|
||||
(and conversely, to inline C/C++ calls in ``ispc``). Doing so can provide
|
||||
performance advantages when calling out to short functions written in the
|
||||
"other" language. Note that you don't need to use ``clang`` to compile all
|
||||
of your C/C++ code, but only for the files where you want to be able to
|
||||
inline. In order to do this, you must have a full installation of LLVM
|
||||
version 3.0 or later, including the ``clang`` compiler.
|
||||
|
||||
The basic approach is to have the various compilers emit LLVM intermediate
|
||||
representation (IR) code and to then use tools from LLVM to link together
|
||||
the IR from the compilers and then re-optimize it, which gives the LLVM
|
||||
optimizer the opportunity to do additional inlining and cross-function
|
||||
optimizations. If you have source files ``foo.ispc`` and ``foo.cpp``,
|
||||
first emit LLVM IR:
|
||||
|
||||
::
|
||||
|
||||
ispc --emit-llvm -o foo_ispc.bc foo.ispc
|
||||
clang -O2 -c -emit-llvm -o foo_cpp.bc foo.cpp
|
||||
|
||||
Next, link the two IR files into a single file and run the LLVM optimizer
|
||||
on the result:
|
||||
|
||||
::
|
||||
|
||||
llvm-link foo_ispc.bc foo_cpp.bc -o - | opt -O3 -o foo_opt.bc
|
||||
|
||||
And finally, generate a native object file:
|
||||
|
||||
::
|
||||
|
||||
llc -filetype=obj foo_opt.bc -o foo.o
|
||||
|
||||
This file can in turn be linked in with the rest of your object files when
|
||||
linking your applicaiton.
|
||||
|
||||
(Note that if you're using the AVX instruction set, you must provide the
|
||||
``-mattr=+avx`` flag to ``llc``.)
|
||||
|
||||
|
||||
Why is it illegal to pass "varying" values from C/C++ to ispc functions?
|
||||
------------------------------------------------------------------------
|
||||
|
||||
If any of the types in the parameter list to an exported function is
|
||||
"varying" (including recursively, and members of structure types, etc.),
|
||||
then ``ispc`` will issue an error and refuse to compile the function:
|
||||
|
||||
::
|
||||
|
||||
% echo "export int add(int x) { return ++x; }" | ispc
|
||||
<stdin>:1:12: Error: Illegal to return a "varying" type from exported function "foo"
|
||||
<stdin>:1:20: Error: Varying parameter "x" is illegal in an exported function.
|
||||
|
||||
While there's no fundamental reason why this isn't possible, recall the
|
||||
definition of "varying" variables: they have one value for each program
|
||||
instance in the gang. As such, the number of values and amount of storage
|
||||
required to represent a varying variable depends on the gang size
|
||||
(i.e. ``programCount``), which can have different values depending on the
|
||||
compilation target.
|
||||
|
||||
``ispc`` therefore prohibits passing "varying" values between the
|
||||
application and the ``ispc`` program in order to prevent the
|
||||
application-side code from depending on a particular gang size, in order to
|
||||
encourage portability to different gang sizes. (A generally desirable
|
||||
programming practice.)
|
||||
|
||||
For cases where the size of data is actually fixed from the application
|
||||
side, the value can be passed via a pointer to a short ``uniform`` array,
|
||||
as follows:
|
||||
|
||||
::
|
||||
|
||||
export void add4(uniform int ptr[4]) {
|
||||
foreach (i = 0 ... 4)
|
||||
ptr[i]++;
|
||||
}
|
||||
|
||||
On the 4-wide SSE instruction set, this compiles to a single vector add
|
||||
instruction (and associated move instructions), while it still also
|
||||
efficiently computes the correct result on 8-wide AVX targets.
|
||||
|
||||
|
||||
Programming Techniques
|
||||
======================
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
64
docs/news.rst
Normal file
64
docs/news.rst
Normal file
@@ -0,0 +1,64 @@
|
||||
=========
|
||||
ispc News
|
||||
=========
|
||||
|
||||
ispc 1.2.1 is Released
|
||||
----------------------
|
||||
|
||||
This is a bugfix release, fixing approximately 20 bugs in the system and
|
||||
improving error handling and error reporting. New functionality includes
|
||||
very efficient float/half conversion routines thanks to Fabian
|
||||
Giesen. See the `1.2.1 release notes`_ for details.
|
||||
|
||||
.. _1.2.1 release notes: https://github.com/ispc/ispc/tree/master/docs/ReleaseNotes.txt
|
||||
|
||||
ispc 1.2.0 is Released
|
||||
-----------------------
|
||||
|
||||
A new major release was posted on March 20, 2012. This release includes
|
||||
significant new functionality for cleanly handling "structure of arrays"
|
||||
(SoA) data layout and a new model for how uniform and varying are handled
|
||||
with structure types.
|
||||
|
||||
Paper on ispc To Appear in InPar 2012
|
||||
-------------------------------------
|
||||
|
||||
A technical paper on ``ispc``, `ispc: A SPMD Compiler for High-Performance
|
||||
CPU Programming`_, by Matt Pharr and William R. Mark, has been accepted to
|
||||
the `InPar 2012`_ conference. This paper describes a number of the design
|
||||
features and key characteristics of the ``ispc`` implementation.
|
||||
|
||||
(© 2012 IEEE. Personal use of this material is permitted. Permission from
|
||||
IEEE must be obtained for all other uses, in any current or future media,
|
||||
including reprinting/republishing this material for advertising or
|
||||
promotional purposes, creating new collective works, for resale or
|
||||
redistribution to servers or lists, or reuse of any copyrighted component
|
||||
of this work in other works.).
|
||||
|
||||
.. _ispc\: A SPMD Compiler for High-Performance CPU Programming: https://github.com/downloads/ispc/ispc/ispc_inpar_2012.pdf
|
||||
.. _InPar 2012: http://innovativeparallel.org/
|
||||
|
||||
ispc 1.1.4 is Released
|
||||
----------------------
|
||||
|
||||
On February 4, 2012, the 1.1.4 release of ``ispc`` was posted; new features
|
||||
include ``new`` and ``delete`` for dynamic memory allocation in ``ispc``
|
||||
programs, "local" atomic operations in the standard library, and a new
|
||||
scalar compilation target. See the `1.1.4 release notes`_ for details.
|
||||
|
||||
.. _1.1.4 release notes: https://github.com/ispc/ispc/tree/master/docs/ReleaseNotes.txt
|
||||
|
||||
|
||||
ispc 1.1.3 is Released
|
||||
----------------------
|
||||
|
||||
With this release, the language now supports "switch" statements, with the same semantics and syntax as in C.
|
||||
|
||||
This release includes fixes for two important performance related issues:
|
||||
the quality of code generated for "foreach" statements has been
|
||||
substantially improved, and performance regression with code for "gathers"
|
||||
that was introduced in v1.1.2 has been fixed in this release.
|
||||
|
||||
Thanks to Jean-Luc Duprat for a number of patches that improve support for
|
||||
building on various platforms, and to Pierre-Antoine Lacaze for patches so
|
||||
that ispc builds under MinGW.
|
||||
@@ -22,8 +22,8 @@ also included in the ``examples/`` directory.)
|
||||
- ``ispc``, 1 core
|
||||
- ``ispc``, 4 cores
|
||||
* - `AOBench`_ (512 x 512 resolution)
|
||||
- 3.99x
|
||||
- 19.32x
|
||||
- 6.19x
|
||||
- 28.06x
|
||||
* - `Binomial Options`_ (128k options)
|
||||
- 7.94x
|
||||
- 33.43x
|
||||
@@ -31,23 +31,23 @@ also included in the ``examples/`` directory.)
|
||||
- 8.45x
|
||||
- 32.48x
|
||||
* - `Deferred Shading`_ (1280p)
|
||||
- n/a
|
||||
- 5.02x
|
||||
- 23.06x
|
||||
* - `Mandelbrot Set`_
|
||||
- 6.21x
|
||||
- 19.90x
|
||||
- 20.28x
|
||||
* - `Perlin Noise Function`_
|
||||
- 5.37x
|
||||
- n/a
|
||||
* - `Ray Tracer`_ (Sponza dataset)
|
||||
- 3.99x
|
||||
- 19.32x
|
||||
- 4.31x
|
||||
- 20.29x
|
||||
* - `3D Stencil`_
|
||||
- 3.76x
|
||||
- 13.79x
|
||||
- 4.05x
|
||||
- 15.53x
|
||||
* - `Volume Rendering`_
|
||||
- 3.11x
|
||||
- 15.80x
|
||||
- 3.60x
|
||||
- 17.53x
|
||||
|
||||
|
||||
.. _AOBench: https://github.com/ispc/ispc/tree/master/examples/aobench
|
||||
@@ -13,6 +13,7 @@ the most out of ``ispc`` in practice.
|
||||
+ `Improving Control Flow Coherence With "foreach_tiled"`_
|
||||
+ `Using Coherent Control Flow Constructs`_
|
||||
+ `Use "uniform" Whenever Appropriate`_
|
||||
+ `Use "Structure of Arrays" Layout When Possible`_
|
||||
|
||||
* `Tips and Techniques`_
|
||||
|
||||
@@ -64,7 +65,7 @@ on each one:
|
||||
Depending on the specifics of the computation being performed, the code
|
||||
generated for this function could likely be improved by modifying the code
|
||||
so that the loop only goes as far through the data as is possible to pack
|
||||
an entire gang of program instances with computation each time thorugh the
|
||||
an entire gang of program instances with computation each time through the
|
||||
loop. Doing so enables the ``ispc`` compiler to generate more efficient
|
||||
code for cases where it knows that the execution mask is "all on". Then,
|
||||
an ``if`` statement at the end handles processing the ragged extra bits of
|
||||
@@ -153,7 +154,7 @@ processed, and so forth.
|
||||
|
||||
Performance benefit can come from using ``foreach_tiled`` in that it
|
||||
essentially optimizes for the benefit of iterating over *compact* regions
|
||||
of the domian (while ``foreach`` iterates over the domain in a way that
|
||||
of the domain (while ``foreach`` iterates over the domain in a way that
|
||||
generally allows linear memory access.) There are two benefits from
|
||||
processing compact regions of the domain.
|
||||
|
||||
@@ -215,7 +216,7 @@ Use "uniform" Whenever Appropriate
|
||||
----------------------------------
|
||||
|
||||
For any variable that will always have the same value across all of the
|
||||
program instances in a gang, declare the variable with the ``unfiorm``
|
||||
program instances in a gang, declare the variable with the ``uniform``
|
||||
qualifier. Doing so enables the ``ispc`` compiler to emit better code in
|
||||
many different ways.
|
||||
|
||||
@@ -229,7 +230,7 @@ number of iterations:
|
||||
|
||||
If this is written with ``i`` as a ``varying`` variable, as above, there's
|
||||
additional overhead in the code generated for the loop as the compiler
|
||||
emits instructions to handle the possibilty of not all program instances
|
||||
emits instructions to handle the possibility of not all program instances
|
||||
following the same control flow path (as might be the case if the loop
|
||||
limit, 10, was itself a ``varying`` value.)
|
||||
|
||||
@@ -247,6 +248,76 @@ but it's always best to provide the compiler with as much help as possible
|
||||
to understand the actual form of your computation.
|
||||
|
||||
|
||||
Use "Structure of Arrays" Layout When Possible
|
||||
----------------------------------------------
|
||||
|
||||
In general, memory access performance (for both reads and writes) is best
|
||||
when the running program instances access a contiguous region of memory; in
|
||||
this case efficient vector load and store instructions can often be used
|
||||
rather than gathers and scatters. As an example of this issue, consider an
|
||||
array of a simple point datatype laid out and accessed in conventional
|
||||
"array of structures" (AOS) layout:
|
||||
|
||||
::
|
||||
|
||||
struct Point { float x, y, z; };
|
||||
uniform Point pts[...];
|
||||
float v = pts[programIndex].x;
|
||||
|
||||
In the above code, the access to ``pts[programIndex].x`` accesses
|
||||
non-sequential memory locations, due to the ``y`` and ``z`` values between
|
||||
the desired ``x`` values in memory. A "gather" is required to get the
|
||||
value of ``v``, with a corresponding decrease in performance.
|
||||
|
||||
If ``Point`` was defined as a "structure of arrays" (SOA) type, the access
|
||||
can be much more efficient:
|
||||
|
||||
::
|
||||
|
||||
struct Point8 { float x[8], y[8], z[8]; };
|
||||
uniform Point8 pts8[...];
|
||||
int majorIndex = programIndex / 8;
|
||||
int minorIndex = programIndex % 8;
|
||||
float v = pts8[majorIndex].x[minorIndex];
|
||||
|
||||
In this case, each ``Point8`` has 8 ``x`` values contiguous in memory
|
||||
before 8 ``y`` values and then 8 ``z`` values. If the gang size is 8 or
|
||||
less, the access for ``v`` will have the same value of ``majorIndex`` for
|
||||
all program instances and will access consecutive elements of the ``x[8]``
|
||||
array with a vector load. (For larger gang sizes, two 8-wide vector loads
|
||||
would be issues, which is also quite efficient.)
|
||||
|
||||
However, the syntax in the above code is messy; accessing SOA data in this
|
||||
fashion is much less elegant than the corresponding code for accessing the
|
||||
data with AOS layout. The ``soa`` qualifier in ``ispc`` can be used to
|
||||
cause the corresponding transformation to be made to the ``Point`` type,
|
||||
while preserving the clean syntax for data access that comes with AOS
|
||||
layout:
|
||||
|
||||
::
|
||||
|
||||
soa<8> Point pts[...];
|
||||
float v = pts[programIndex].x;
|
||||
|
||||
Thanks to having SOA layout a first-class concept in the language's type
|
||||
system, it's easy to write functions that convert data between the
|
||||
layouts. For example, the ``aos_to_soa`` function below converts ``count``
|
||||
elements of the given ``Point`` type from AOS to 8-wide SOA layout. (It
|
||||
assumes that the caller has pre-allocated sufficient space in the
|
||||
``pts_soa`` output array.
|
||||
|
||||
::
|
||||
|
||||
void aos_to_soa(uniform Point pts_aos[], uniform int count,
|
||||
soa<8> pts_soa[]) {
|
||||
foreach (i = 0 ... count)
|
||||
pts_soa[i] = pts_aos[i];
|
||||
}
|
||||
|
||||
Analogously, a function could be written to convert back from SOA to AOS if
|
||||
needed.
|
||||
|
||||
|
||||
Tips and Techniques
|
||||
===================
|
||||
|
||||
@@ -339,6 +410,12 @@ based on the index, it can be worth doing. See the example
|
||||
``examples/volume_rendering`` in the ``ispc`` distribution for the use of
|
||||
this technique in an instance where it is beneficial to performance.
|
||||
|
||||
Understanding Memory Read Coalescing
|
||||
------------------------------------
|
||||
|
||||
XXXX todo
|
||||
|
||||
|
||||
Avoid 64-bit Addressing Calculations When Possible
|
||||
--------------------------------------------------
|
||||
|
||||
@@ -568,7 +645,7 @@ mask of all lanes currently executing (assuming a four-wide gang size
|
||||
target machine).
|
||||
|
||||
For a fuller example of the utility of this functionality, see
|
||||
``examples/aobench_instrumented`` in the ``ispc`` distribution. Ths
|
||||
``examples/aobench_instrumented`` in the ``ispc`` distribution. This
|
||||
example includes an implementation of the ``ISPCInstrument()`` function
|
||||
that collects aggregate data about the program's execution behavior.
|
||||
|
||||
65
docs/template-news.txt
Normal file
65
docs/template-news.txt
Normal file
@@ -0,0 +1,65 @@
|
||||
%(head_prefix)s
|
||||
%(head)s
|
||||
<script type="text/javascript">
|
||||
|
||||
var _gaq = _gaq || [];
|
||||
_gaq.push(['_setAccount', 'UA-1486404-4']);
|
||||
_gaq.push(['_trackPageview']);
|
||||
|
||||
(function() {
|
||||
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
||||
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
||||
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
||||
})();
|
||||
|
||||
</script>
|
||||
%(stylesheet)s
|
||||
%(body_prefix)s
|
||||
<div id="wrap">
|
||||
<div id="wrap2">
|
||||
<div id="header">
|
||||
<h1 id="logo">Intel SPMD Program Compiler</h1>
|
||||
<div id="slogan">An open-source compiler for high-performance SIMD programming on
|
||||
the CPU</div>
|
||||
</div>
|
||||
<div id="nav">
|
||||
<div id="nbar">
|
||||
<ul>
|
||||
<li><a href="index.html">Overview</a></li>
|
||||
<li id="selected"><a href="news.html">News</a></li>
|
||||
<li><a href="features.html">Features</a></li>
|
||||
<li><a href="downloads.html">Downloads</a></li>
|
||||
<li><a href="documentation.html">Documentation</a></li>
|
||||
<li><a href="perf.html">Performance</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div id="content-wrap">
|
||||
<div id="sidebar">
|
||||
<div class="widgetspace">
|
||||
<h1>Resources</h1>
|
||||
<ul class="menu">
|
||||
<li><a href="http://github.com/ispc/ispc/">ispc page on github</a></li>
|
||||
<li><a href="http://groups.google.com/group/ispc-users/">ispc
|
||||
users mailing list</a></li>
|
||||
<li><a href="http://groups.google.com/group/ispc-dev/">ispc
|
||||
developers mailing list</a></li>
|
||||
<li><a href="http://github.com/ispc/ispc/wiki/">Wiki</a></li>
|
||||
<li><a href="http://github.com/ispc/ispc/issues/">Bug tracking</a></li>
|
||||
<li><a href="doxygen/index.html">Doxygen</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
%(body_pre_docinfo)s
|
||||
%(docinfo)s
|
||||
<div id="content">
|
||||
%(body)s
|
||||
</div>
|
||||
<div class="clearfix"></div>
|
||||
<div id="footer"> © 2011-2012 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||
<!-- Please Do Not remove this link, thank u -->
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
%(body_suffix)s
|
||||
@@ -26,6 +26,7 @@
|
||||
<div id="nbar">
|
||||
<ul>
|
||||
<li><a href="index.html">Overview</a></li>
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="features.html">Features</a></li>
|
||||
<li><a href="downloads.html">Downloads</a></li>
|
||||
<li><a href="documentation.html">Documentation</a></li>
|
||||
@@ -45,8 +46,7 @@
|
||||
developers mailing list</a></li>
|
||||
<li><a href="http://github.com/ispc/ispc/wiki/">Wiki</a></li>
|
||||
<li><a href="http://github.com/ispc/ispc/issues/">Bug tracking</a></li>
|
||||
<li><a href="doxygen/index.html">Doxygen documentation of
|
||||
<tt>ispc</tt> source code</a></li>
|
||||
<li><a href="doxygen/index.html">Doxygen</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
@@ -56,7 +56,7 @@
|
||||
%(body)s
|
||||
</div>
|
||||
<div class="clearfix"></div>
|
||||
<div id="footer"> © 2011 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||
<div id="footer"> © 2011-2012 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||
<!-- Please Do Not remove this link, thank u -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
<div id="nbar">
|
||||
<ul>
|
||||
<li><a href="index.html">Overview</a></li>
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="features.html">Features</a></li>
|
||||
<li><a href="downloads.html">Downloads</a></li>
|
||||
<li id="selected"><a href="documentation.html">Documentation</a></li>
|
||||
@@ -45,8 +46,7 @@
|
||||
developers mailing list</a></li>
|
||||
<li><a href="http://github.com/ispc/ispc/wiki/">Wiki</a></li>
|
||||
<li><a href="http://github.com/ispc/ispc/issues/">Bug tracking</a></li>
|
||||
<li><a href="doxygen/index.html">Doxygen documentation of
|
||||
<tt>ispc</tt> source code</a></li>
|
||||
<li><a href="doxygen/index.html">Doxygen</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
@@ -56,7 +56,7 @@
|
||||
%(body)s
|
||||
</div>
|
||||
<div class="clearfix"></div>
|
||||
<div id="footer"> © 2011 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||
<div id="footer"> © 2011-2012 <strong>Intel Corporation</strong> | Valid <a href="http://validator.w3.org/check?uri=referer">XHTML</a> | <a href="http://jigsaw.w3.org/css-validator/check/referer">CSS</a> | ClearBlue by: <a href="http://www.themebin.com/">ThemeBin</a>
|
||||
<!-- Please Do Not remove this link, thank u -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.1.0
|
||||
PROJECT_NUMBER = 1.2.2
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
||||
@@ -39,9 +39,6 @@ example implementation of this function that counts the number of times the
|
||||
callback is made and records some statistics about control flow coherence
|
||||
is provided in the instrument.cpp file.
|
||||
|
||||
*** Note: on Linux, this example currently hits an assertion in LLVM during
|
||||
*** compilation
|
||||
|
||||
|
||||
Deferred
|
||||
========
|
||||
@@ -110,6 +107,13 @@ This program implements both the Black-Scholes and Binomial options pricing
|
||||
models in both ispc and regular serial C++ code.
|
||||
|
||||
|
||||
Perfbench
|
||||
=========
|
||||
|
||||
This runs a number of microbenchmarks to measure system performance and
|
||||
code generation quality.
|
||||
|
||||
|
||||
RT
|
||||
==
|
||||
|
||||
|
||||
@@ -1,39 +1,7 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=ao
|
||||
CPP_SRC=ao.cpp ao_serial.cpp
|
||||
ISPC_SRC=ao.ispc
|
||||
ISPC_TARGETS=sse2,sse4,avx
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4,avx --arch=x86-64
|
||||
|
||||
ISPC_OBJS=objs/ao_ispc.o objs/ao_ispc_sse2.o objs/ao_ispc_sse4.o \
|
||||
objs/ao_ispc_avx.o
|
||||
OBJS=objs/ao.o objs/ao_serial.o $(ISPC_OBJS) $(TASK_OBJ)
|
||||
|
||||
default: ao
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ ao
|
||||
|
||||
ao: dirs $(OBJS) $(TASK_OBJ)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/ao.o: objs/ao_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -50,7 +50,6 @@ struct Isect {
|
||||
struct Sphere {
|
||||
vec center;
|
||||
float radius;
|
||||
|
||||
};
|
||||
|
||||
struct Plane {
|
||||
@@ -82,8 +81,8 @@ static inline void vnormalize(vec &v) {
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ray_plane_intersect(Isect &isect, Ray &ray, Plane &plane) {
|
||||
static void
|
||||
ray_plane_intersect(Isect &isect, Ray &ray, uniform Plane &plane) {
|
||||
float d = -dot(plane.p, plane.n);
|
||||
float v = dot(ray.dir, plane.n);
|
||||
|
||||
@@ -103,7 +102,7 @@ ray_plane_intersect(Isect &isect, Ray &ray, Plane &plane) {
|
||||
|
||||
|
||||
static inline void
|
||||
ray_sphere_intersect(Isect &isect, Ray &ray, Sphere &sphere) {
|
||||
ray_sphere_intersect(Isect &isect, Ray &ray, uniform Sphere &sphere) {
|
||||
vec rs = ray.org - sphere.center;
|
||||
|
||||
float B = dot(rs, ray.dir);
|
||||
@@ -124,7 +123,7 @@ ray_sphere_intersect(Isect &isect, Ray &ray, Sphere &sphere) {
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
static void
|
||||
orthoBasis(vec basis[3], vec n) {
|
||||
basis[2] = n;
|
||||
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
|
||||
@@ -147,8 +146,8 @@ orthoBasis(vec basis[3], vec n) {
|
||||
}
|
||||
|
||||
|
||||
static inline float
|
||||
ambient_occlusion(Isect &isect, Plane &plane, Sphere spheres[3],
|
||||
static float
|
||||
ambient_occlusion(Isect &isect, uniform Plane &plane, uniform Sphere spheres[3],
|
||||
RNGState &rngstate) {
|
||||
float eps = 0.0001f;
|
||||
vec p, n;
|
||||
@@ -204,112 +203,52 @@ ambient_occlusion(Isect &isect, Plane &plane, Sphere spheres[3],
|
||||
static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
|
||||
uniform int h, uniform int nsubsamples,
|
||||
uniform float image[]) {
|
||||
static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
|
||||
static Sphere spheres[3] = {
|
||||
static uniform Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
|
||||
static uniform Sphere spheres[3] = {
|
||||
{ { -2.0f, 0.0f, -3.5f }, 0.5f },
|
||||
{ { -0.5f, 0.0f, -3.0f }, 0.5f },
|
||||
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
|
||||
RNGState rngstate;
|
||||
|
||||
seed_rng(&rngstate, y0);
|
||||
float invSamples = 1.f / nsubsamples;
|
||||
|
||||
// Compute the mapping between the 'programCount'-wide program
|
||||
// instances running in parallel and samples in the image.
|
||||
//
|
||||
// For now, we'll always take four samples per pixel, so start by
|
||||
// initializing du and dv with offsets into subpixel samples. We'll
|
||||
// take care of further updating du and dv for the case where we're
|
||||
// doing more than 4 program instances in parallel shortly.
|
||||
uniform float uSteps[4] = { 0, 1, 0, 1 };
|
||||
uniform float vSteps[4] = { 0, 0, 1, 1 };
|
||||
float du = uSteps[programIndex % 4] / nsubsamples;
|
||||
float dv = vSteps[programIndex % 4] / nsubsamples;
|
||||
foreach_tiled(y = y0 ... y1, x = 0 ... w,
|
||||
u = 0 ... nsubsamples, v = 0 ... nsubsamples) {
|
||||
float du = (float)u * invSamples, dv = (float)v * invSamples;
|
||||
|
||||
// Now handle the case where we are able to do more than one pixel's
|
||||
// worth of work at once. nx records the number of pixels in the x
|
||||
// direction we do per iteration and ny the number in y.
|
||||
uniform int nx = 1, ny = 1;
|
||||
// Figure out x,y pixel in NDC
|
||||
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
|
||||
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
|
||||
float ret = 0.f;
|
||||
Ray ray;
|
||||
Isect isect;
|
||||
|
||||
// FIXME: We actually need ny to be 1 regardless of the decomposition,
|
||||
// since the task decomposition is one scanline high.
|
||||
ray.org = 0.f;
|
||||
|
||||
if (programCount == 8) {
|
||||
// Do two pixels at once in the x direction
|
||||
nx = 2;
|
||||
if (programIndex >= 4)
|
||||
// And shift the offsets for the second pixel's worth of work
|
||||
++du;
|
||||
}
|
||||
else if (programCount == 16) {
|
||||
nx = 4;
|
||||
ny = 1;
|
||||
if (programIndex >= 4 && programIndex < 8)
|
||||
++du;
|
||||
if (programIndex >= 8 && programIndex < 12)
|
||||
du += 2;
|
||||
if (programIndex >= 12)
|
||||
du += 3;
|
||||
}
|
||||
// Poor man's perspective projection
|
||||
ray.dir.x = px;
|
||||
ray.dir.y = py;
|
||||
ray.dir.z = -1.0;
|
||||
vnormalize(ray.dir);
|
||||
|
||||
// Now loop over all of the pixels, stepping in x and y as calculated
|
||||
// above. (Assumes that ny divides y and nx divides x...)
|
||||
for (uniform int y = y0; y < y1; y += ny) {
|
||||
for (uniform int x = 0; x < w; x += nx) {
|
||||
// Figure out x,y pixel in NDC
|
||||
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
|
||||
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
|
||||
float ret = 0.f;
|
||||
Ray ray;
|
||||
Isect isect;
|
||||
isect.t = 1.0e+17;
|
||||
isect.hit = 0;
|
||||
|
||||
ray.org = 0.f;
|
||||
for (uniform int snum = 0; snum < 3; ++snum)
|
||||
ray_sphere_intersect(isect, ray, spheres[snum]);
|
||||
ray_plane_intersect(isect, ray, plane);
|
||||
|
||||
// Poor man's perspective projection
|
||||
ray.dir.x = px;
|
||||
ray.dir.y = py;
|
||||
ray.dir.z = -1.0;
|
||||
vnormalize(ray.dir);
|
||||
// Note use of 'coherent' if statement; the set of rays we
|
||||
// trace will often all hit or all miss the scene
|
||||
cif (isect.hit) {
|
||||
ret = ambient_occlusion(isect, plane, spheres, rngstate);
|
||||
ret *= invSamples * invSamples;
|
||||
|
||||
isect.t = 1.0e+17;
|
||||
isect.hit = 0;
|
||||
|
||||
for (uniform int snum = 0; snum < 3; ++snum)
|
||||
ray_sphere_intersect(isect, ray, spheres[snum]);
|
||||
ray_plane_intersect(isect, ray, plane);
|
||||
|
||||
// Note use of 'coherent' if statement; the set of rays we
|
||||
// trace will often all hit or all miss the scene
|
||||
cif (isect.hit)
|
||||
ret = ambient_occlusion(isect, plane, spheres, rngstate);
|
||||
|
||||
// This is a little grungy; we have results for
|
||||
// programCount-worth of values. Because we're doing 2x2
|
||||
// subsamples, we need to peel them off in groups of four,
|
||||
// average the four values for each pixel, and update the
|
||||
// output image.
|
||||
//
|
||||
// Store the varying value to a uniform array of the same size.
|
||||
// See the discussion about communication among program
|
||||
// instances in the ispc user's manual for more discussion on
|
||||
// this idiom.
|
||||
uniform float retArray[programCount];
|
||||
retArray[programIndex] = ret;
|
||||
|
||||
// offset to the first pixel in the image
|
||||
uniform int offset = 3 * (y * w + x);
|
||||
for (uniform int p = 0; p < programCount; p += 4, offset += 3) {
|
||||
// Get the four sample values for this pixel
|
||||
uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] +
|
||||
retArray[p+3];
|
||||
|
||||
// Normalize by number of samples taken
|
||||
sumret /= nsubsamples * nsubsamples;
|
||||
|
||||
// Store result in the image
|
||||
image[offset+0] = sumret;
|
||||
image[offset+1] = sumret;
|
||||
image[offset+2] = sumret;
|
||||
}
|
||||
int offset = 3 * (y * w + x);
|
||||
atomic_add_local(&image[offset], ret);
|
||||
atomic_add_local(&image[offset+1], ret);
|
||||
atomic_add_local(&image[offset+2], ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -329,5 +268,5 @@ static void task ao_task(uniform int width, uniform int height,
|
||||
|
||||
export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
|
||||
uniform float image[]) {
|
||||
launch[h] < ao_task(w, h, nsubsamples, image) >;
|
||||
launch[h] ao_task(w, h, nsubsamples, image);
|
||||
}
|
||||
|
||||
@@ -14,13 +14,13 @@ dirs:
|
||||
clean:
|
||||
/bin/rm -rf objs *~ ao
|
||||
|
||||
ao: dirs objs/ao.o objs/instrument.o objs/ao_ispc.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ objs/ao.o objs/ao_ispc.o objs/instrument.o -lm -lpthread
|
||||
ao: objs/ao.o objs/instrument.o objs/ao_ispc.o ../tasksys.cpp
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ -lm -lpthread
|
||||
|
||||
objs/%.o: %.cpp
|
||||
objs/%.o: %.cpp dirs
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/ao.o: objs/ao_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc dirs
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_instrumented_ispc.h
|
||||
|
||||
@@ -329,5 +329,5 @@ static void task ao_task(uniform int width, uniform int height,
|
||||
|
||||
export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
|
||||
uniform float image[]) {
|
||||
launch[h] < ao_task(w, h, nsubsamples, image) >;
|
||||
launch[h] ao_task(w, h, nsubsamples, image);
|
||||
}
|
||||
|
||||
65
examples/common.mk
Normal file
65
examples/common.mk
Normal file
@@ -0,0 +1,65 @@
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=tasksys.o
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O2 -m64
|
||||
LIBS=-lm $(TASK_LIB) -lstdc++
|
||||
ISPC=ispc -O2 --arch=x86-64 $(ISPC_FLAGS)
|
||||
ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc.o $(ISPC_SRC:.ispc=)_ispc_sse2.o \
|
||||
$(ISPC_SRC:.ispc=)_ispc_sse4.o $(ISPC_SRC:.ispc=)_ispc_avx.o)
|
||||
ISPC_HEADER=objs/$(ISPC_SRC:.ispc=_ispc.h)
|
||||
CPP_OBJS=$(addprefix objs/, $(CPP_SRC:.cpp=.o) $(TASK_OBJ))
|
||||
|
||||
default: $(EXAMPLE)
|
||||
|
||||
all: $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 $(EXAMPLE)-scalar
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
objs/%.cpp objs/%.o objs/%.h: dirs
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16
|
||||
|
||||
$(EXAMPLE): $(CPP_OBJS) $(ISPC_OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
objs/%.o: %.cpp dirs $(ISPC_HEADER)
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp dirs
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/$(EXAMPLE).o: objs/$(EXAMPLE)_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) --target=$(ISPC_TARGETS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_sse4.cpp: $(ISPC_SRC)
|
||||
$(ISPC) $< -o $@ --target=generic-4 --emit-c++ --c++-include-file=sse4.h
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_sse4.o: objs/$(ISPC_SRC:.ispc=)_sse4.cpp
|
||||
$(CXX) -I../intrinsics -msse4.2 $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
$(EXAMPLE)-sse4: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_sse4.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_generic16.cpp: $(ISPC_SRC)
|
||||
$(ISPC) $< -o $@ --target=generic-16 --emit-c++ --c++-include-file=generic-16.h
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_generic16.o: objs/$(ISPC_SRC:.ispc=)_generic16.cpp
|
||||
$(CXX) -I../intrinsics $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
$(EXAMPLE)-generic16: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_generic16.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_scalar.o: $(ISPC_SRC)
|
||||
$(ISPC) $< -o $@ --target=generic-1
|
||||
|
||||
$(EXAMPLE)-scalar: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_scalar.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
@@ -1,38 +1,8 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=deferred_shading
|
||||
CPP_SRC=common.cpp main.cpp dynamic_c.cpp dynamic_cilk.cpp
|
||||
ISPC_SRC=kernels.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx-x2
|
||||
ISPC_FLAGS=--opt=fast-math
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64 --math-lib=fast
|
||||
|
||||
OBJS=objs/main.o objs/common.o objs/kernels_ispc.o objs/kernels_ispc_sse2.o \
|
||||
objs/kernels_ispc_sse4.o objs/kernels_ispc_avx.o \
|
||||
objs/dynamic_c.o objs/dynamic_cilk.o
|
||||
|
||||
default: deferred_shading
|
||||
|
||||
.PHONY: dirs clean
|
||||
.PRECIOUS: objs/kernels_ispc.h
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ deferred_shading
|
||||
|
||||
deferred_shading: dirs $(OBJS) $(TASK_OBJ)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) $(TASK_OBJ) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp objs/kernels_ispc.h deferred.h
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -204,6 +204,7 @@ void WriteFrame(const char *filename, const InputData *input,
|
||||
fprintf(out, "P6 %d %d 255\n", input->header.framebufferWidth,
|
||||
input->header.framebufferHeight);
|
||||
fwrite(framebufferAOS, imageBytes, 1, out);
|
||||
fclose(out);
|
||||
|
||||
lAlignedFree(framebufferAOS);
|
||||
}
|
||||
|
||||
@@ -35,35 +35,35 @@
|
||||
|
||||
struct InputDataArrays
|
||||
{
|
||||
uniform float * uniform zBuffer;
|
||||
uniform unsigned int16 * uniform normalEncoded_x; // half float
|
||||
uniform unsigned int16 * uniform normalEncoded_y; // half float
|
||||
uniform unsigned int16 * uniform specularAmount; // half float
|
||||
uniform unsigned int16 * uniform specularPower; // half float
|
||||
uniform unsigned int8 * uniform albedo_x; // unorm8
|
||||
uniform unsigned int8 * uniform albedo_y; // unorm8
|
||||
uniform unsigned int8 * uniform albedo_z; // unorm8
|
||||
uniform float * uniform lightPositionView_x;
|
||||
uniform float * uniform lightPositionView_y;
|
||||
uniform float * uniform lightPositionView_z;
|
||||
uniform float * uniform lightAttenuationBegin;
|
||||
uniform float * uniform lightColor_x;
|
||||
uniform float * uniform lightColor_y;
|
||||
uniform float * uniform lightColor_z;
|
||||
uniform float * uniform lightAttenuationEnd;
|
||||
float *zBuffer;
|
||||
unsigned int16 *normalEncoded_x; // half float
|
||||
unsigned int16 *normalEncoded_y; // half float
|
||||
unsigned int16 *specularAmount; // half float
|
||||
unsigned int16 *specularPower; // half float
|
||||
unsigned int8 *albedo_x; // unorm8
|
||||
unsigned int8 *albedo_y; // unorm8
|
||||
unsigned int8 *albedo_z; // unorm8
|
||||
float *lightPositionView_x;
|
||||
float *lightPositionView_y;
|
||||
float *lightPositionView_z;
|
||||
float *lightAttenuationBegin;
|
||||
float *lightColor_x;
|
||||
float *lightColor_y;
|
||||
float *lightColor_z;
|
||||
float *lightAttenuationEnd;
|
||||
};
|
||||
|
||||
struct InputHeader
|
||||
{
|
||||
uniform float cameraProj[4][4];
|
||||
uniform float cameraNear;
|
||||
uniform float cameraFar;
|
||||
float cameraProj[4][4];
|
||||
float cameraNear;
|
||||
float cameraFar;
|
||||
|
||||
uniform int32 framebufferWidth;
|
||||
uniform int32 framebufferHeight;
|
||||
uniform int32 numLights;
|
||||
uniform int32 inputDataChunkSize;
|
||||
uniform int32 inputDataArrayOffsets[idaNum];
|
||||
int32 framebufferWidth;
|
||||
int32 framebufferHeight;
|
||||
int32 numLights;
|
||||
int32 inputDataChunkSize;
|
||||
int32 inputDataArrayOffsets[idaNum];
|
||||
};
|
||||
|
||||
|
||||
@@ -158,38 +158,22 @@ IntersectLightsWithTileMinMax(
|
||||
uniform float gBufferScale_x = 0.5f * (float)gBufferWidth;
|
||||
uniform float gBufferScale_y = 0.5f * (float)gBufferHeight;
|
||||
|
||||
// Parallize across frustum planes.
|
||||
// We really only have four side planes here, but write the code to
|
||||
// handle programCount > 4 robustly
|
||||
uniform float frustumPlanes_xy[programCount];
|
||||
uniform float frustumPlanes_z[programCount];
|
||||
uniform float frustumPlanes_xy[4] = {
|
||||
-(cameraProj_11 * gBufferScale_x),
|
||||
(cameraProj_11 * gBufferScale_x),
|
||||
(cameraProj_22 * gBufferScale_y),
|
||||
-(cameraProj_22 * gBufferScale_y) };
|
||||
uniform float frustumPlanes_z[4] = {
|
||||
tileEndX - gBufferScale_x,
|
||||
-tileStartX + gBufferScale_x,
|
||||
tileEndY - gBufferScale_y,
|
||||
-tileStartY + gBufferScale_y };
|
||||
|
||||
// TODO: If programIndex < 4 here? Don't care about masking off the
|
||||
// rest but if interleaving ("x2" modes) the other lanes should ideally
|
||||
// not be emitted...
|
||||
{
|
||||
// This one is totally constant over the whole screen... worth pulling it up at all?
|
||||
float frustumPlanes_xy_v;
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 0, -(cameraProj_11 * gBufferScale_x));
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 1, (cameraProj_11 * gBufferScale_x));
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 2, (cameraProj_22 * gBufferScale_y));
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 3, -(cameraProj_22 * gBufferScale_y));
|
||||
|
||||
float frustumPlanes_z_v;
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 0, tileEndX - gBufferScale_x);
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 1, -tileStartX + gBufferScale_x);
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 2, tileEndY - gBufferScale_y);
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 3, -tileStartY + gBufferScale_y);
|
||||
|
||||
// Normalize
|
||||
float norm = rsqrt(frustumPlanes_xy_v * frustumPlanes_xy_v +
|
||||
frustumPlanes_z_v * frustumPlanes_z_v);
|
||||
frustumPlanes_xy_v *= norm;
|
||||
frustumPlanes_z_v *= norm;
|
||||
|
||||
// Save out for uniform use later
|
||||
frustumPlanes_xy[programIndex] = frustumPlanes_xy_v;
|
||||
frustumPlanes_z[programIndex] = frustumPlanes_z_v;
|
||||
for (uniform int i = 0; i < 4; ++i) {
|
||||
uniform float norm = rsqrt(frustumPlanes_xy[i] * frustumPlanes_xy[i] +
|
||||
frustumPlanes_z[i] * frustumPlanes_z[i]);
|
||||
frustumPlanes_xy[i] *= norm;
|
||||
frustumPlanes_z[i] *= norm;
|
||||
}
|
||||
|
||||
uniform int32 tileNumLights = 0;
|
||||
@@ -343,8 +327,8 @@ ShadeTile(
|
||||
|
||||
// Reconstruct normal from G-buffer
|
||||
float surface_normal_x, surface_normal_y, surface_normal_z;
|
||||
float normal_x = half_to_float_fast(inputData.normalEncoded_x[gBufferOffset]);
|
||||
float normal_y = half_to_float_fast(inputData.normalEncoded_y[gBufferOffset]);
|
||||
float normal_x = half_to_float(inputData.normalEncoded_x[gBufferOffset]);
|
||||
float normal_y = half_to_float(inputData.normalEncoded_y[gBufferOffset]);
|
||||
|
||||
float f = (normal_x - normal_x * normal_x) + (normal_y - normal_y * normal_y);
|
||||
float m = sqrt(4.0f * f - 1.0f);
|
||||
@@ -355,9 +339,9 @@ ShadeTile(
|
||||
|
||||
// Load other G-buffer parameters
|
||||
float surface_specularAmount =
|
||||
half_to_float_fast(inputData.specularAmount[gBufferOffset]);
|
||||
half_to_float(inputData.specularAmount[gBufferOffset]);
|
||||
float surface_specularPower =
|
||||
half_to_float_fast(inputData.specularPower[gBufferOffset]);
|
||||
half_to_float(inputData.specularPower[gBufferOffset]);
|
||||
float surface_albedo_x = Unorm8ToFloat32(inputData.albedo_x[gBufferOffset]);
|
||||
float surface_albedo_y = Unorm8ToFloat32(inputData.albedo_y[gBufferOffset]);
|
||||
float surface_albedo_z = Unorm8ToFloat32(inputData.albedo_z[gBufferOffset]);
|
||||
@@ -530,9 +514,9 @@ RenderStatic(uniform InputHeader &inputHeader,
|
||||
|
||||
// Launch a task to render each tile, each of which is MIN_TILE_WIDTH
|
||||
// by MIN_TILE_HEIGHT pixels.
|
||||
launch[num_groups] < RenderTile(num_groups_x, num_groups_y,
|
||||
inputHeader, inputData, visualizeLightCount,
|
||||
framebuffer_r, framebuffer_g, framebuffer_b) >;
|
||||
launch[num_groups] RenderTile(num_groups_x, num_groups_y,
|
||||
inputHeader, inputData, visualizeLightCount,
|
||||
framebuffer_r, framebuffer_g, framebuffer_b);
|
||||
}
|
||||
|
||||
|
||||
@@ -591,8 +575,6 @@ SplitTileMinMax(
|
||||
uniform float light_positionView_z_array[],
|
||||
uniform float light_attenuationEnd_array[],
|
||||
// Outputs
|
||||
// TODO: ISPC doesn't currently like multidimensionsal arrays so we'll do the
|
||||
// indexing math ourselves
|
||||
uniform int32 subtileIndices[],
|
||||
uniform int32 subtileIndicesPitch,
|
||||
uniform int32 subtileNumLights[]
|
||||
@@ -601,30 +583,20 @@ SplitTileMinMax(
|
||||
uniform float gBufferScale_x = 0.5f * (float)gBufferWidth;
|
||||
uniform float gBufferScale_y = 0.5f * (float)gBufferHeight;
|
||||
|
||||
// Parallize across frustum planes
|
||||
// Only have 2 frustum split planes here so may not be worth it, but
|
||||
// we'll do it for now for consistency
|
||||
uniform float frustumPlanes_xy[programCount];
|
||||
uniform float frustumPlanes_z[programCount];
|
||||
|
||||
// This one is totally constant over the whole screen... worth pulling it up at all?
|
||||
float frustumPlanes_xy_v;
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 0, -(cameraProj_11 * gBufferScale_x));
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 1, (cameraProj_22 * gBufferScale_y));
|
||||
|
||||
float frustumPlanes_z_v;
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 0, tileMidX - gBufferScale_x);
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 1, tileMidY - gBufferScale_y);
|
||||
uniform float frustumPlanes_xy[2] = { -(cameraProj_11 * gBufferScale_x),
|
||||
(cameraProj_22 * gBufferScale_y) };
|
||||
uniform float frustumPlanes_z[2] = { tileMidX - gBufferScale_x,
|
||||
tileMidY - gBufferScale_y };
|
||||
|
||||
// Normalize
|
||||
float norm = rsqrt(frustumPlanes_xy_v * frustumPlanes_xy_v +
|
||||
frustumPlanes_z_v * frustumPlanes_z_v);
|
||||
frustumPlanes_xy_v *= norm;
|
||||
frustumPlanes_z_v *= norm;
|
||||
|
||||
// Save out for uniform use later
|
||||
frustumPlanes_xy[programIndex] = frustumPlanes_xy_v;
|
||||
frustumPlanes_z[programIndex] = frustumPlanes_z_v;
|
||||
uniform float norm[2] = { rsqrt(frustumPlanes_xy[0] * frustumPlanes_xy[0] +
|
||||
frustumPlanes_z[0] * frustumPlanes_z[0]),
|
||||
rsqrt(frustumPlanes_xy[1] * frustumPlanes_xy[1] +
|
||||
frustumPlanes_z[1] * frustumPlanes_z[1]) };
|
||||
frustumPlanes_xy[0] *= norm[0];
|
||||
frustumPlanes_xy[1] *= norm[1];
|
||||
frustumPlanes_z[0] *= norm[0];
|
||||
frustumPlanes_z[1] *= norm[1];
|
||||
|
||||
// Initialize
|
||||
uniform int32 subtileLightOffset[4];
|
||||
|
||||
@@ -23,6 +23,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stencil", "stencil\stencil.
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "deferred_shading", "deferred\deferred_shading.vcxproj", "{87F53C53-957E-4E91-878A-BC27828FB9EB}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "perfbench", "perfbench\perfbench.vcxproj", "{D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
@@ -119,6 +121,14 @@ Global
|
||||
{87F53C53-957E-4E91-878A-BC27828FB9EB}.Release|Win32.Build.0 = Release|Win32
|
||||
{87F53C53-957E-4E91-878A-BC27828FB9EB}.Release|x64.ActiveCfg = Release|x64
|
||||
{87F53C53-957E-4E91-878A-BC27828FB9EB}.Release|x64.Build.0 = Release|x64
|
||||
{D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}.Debug|x64.Build.0 = Debug|x64
|
||||
{D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}.Release|Win32.Build.0 = Release|Win32
|
||||
{D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}.Release|x64.ActiveCfg = Release|x64
|
||||
{D923BB7E-A7C8-4850-8FCF-0EB9CE35B4E8}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
1487
examples/intrinsics/generic-16.h
Normal file
1487
examples/intrinsics/generic-16.h
Normal file
File diff suppressed because it is too large
Load Diff
3686
examples/intrinsics/sse4.h
Normal file
3686
examples/intrinsics/sse4.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,30 +1,7 @@
|
||||
|
||||
CXX=g++ -m64
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64
|
||||
EXAMPLE=mandelbrot
|
||||
CPP_SRC=mandelbrot.cpp mandelbrot_serial.cpp
|
||||
ISPC_SRC=mandelbrot.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx-x2
|
||||
|
||||
default: mandelbrot
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ mandelbrot
|
||||
|
||||
OBJS=objs/mandelbrot.o objs/mandelbrot_serial.o objs/mandelbrot_ispc_sse2.o \
|
||||
objs/mandelbrot_ispc_sse4.o objs/mandelbrot_ispc_avx.o \
|
||||
objs/mandelbrot_ispc.o
|
||||
|
||||
mandelbrot: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/mandelbrot.o: objs/mandelbrot_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -1,39 +1,7 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=mandelbrot
|
||||
CPP_SRC=mandelbrot.cpp mandelbrot_serial.cpp
|
||||
ISPC_SRC=mandelbrot.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx-x2
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64
|
||||
|
||||
OBJS=objs/mandelbrot.o objs/mandelbrot_serial.o $(TASK_OBJ) \
|
||||
objs/mandelbrot_ispc.o objs/mandelbrot_ispc_sse2.o \
|
||||
objs/mandelbrot_ispc_sse4.o objs/mandelbrot_ispc_avx.o
|
||||
|
||||
default: mandelbrot
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ mandelbrot
|
||||
|
||||
mandelbrot: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/mandelbrot.o: objs/mandelbrot_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -49,17 +49,16 @@ mandel(float c_re, float c_im, int count) {
|
||||
}
|
||||
|
||||
|
||||
/* Task to compute the Mandelbrot iterations for a span of scanlines from
|
||||
[ystart,yend).
|
||||
/* Task to compute the Mandelbrot iterations for a single scanline.
|
||||
*/
|
||||
task void
|
||||
mandelbrot_scanlines(uniform int ybase, uniform int span,
|
||||
uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int maxIterations,
|
||||
uniform int output[]) {
|
||||
uniform int ystart = ybase + taskIndex * span;
|
||||
uniform int yend = ystart + span;
|
||||
mandelbrot_scanline(uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int height,
|
||||
uniform int span,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform int ystart = taskIndex * span;
|
||||
uniform int yend = min((taskIndex+1) * span, (unsigned int)height);
|
||||
|
||||
foreach (yi = ystart ... yend, xi = 0 ... width) {
|
||||
float x = x0 + xi * dx;
|
||||
@@ -71,20 +70,6 @@ mandelbrot_scanlines(uniform int ybase, uniform int span,
|
||||
}
|
||||
|
||||
|
||||
task void
|
||||
mandelbrot_chunk(uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int height,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform int ystart = taskIndex * (height/taskCount);
|
||||
uniform int yend = (taskIndex+1) * (height/taskCount);
|
||||
uniform int span = 1;
|
||||
|
||||
launch[(yend-ystart)/span] < mandelbrot_scanlines(ystart, span, x0, dx, y0, dy,
|
||||
width, maxIterations, output) >;
|
||||
}
|
||||
|
||||
|
||||
export void
|
||||
mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform float x1, uniform float y1,
|
||||
@@ -92,7 +77,8 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform float dx = (x1 - x0) / width;
|
||||
uniform float dy = (y1 - y0) / height;
|
||||
uniform int span = 4;
|
||||
|
||||
launch[32] < mandelbrot_chunk(x0, dx, y0, dy, width, height,
|
||||
maxIterations, output) >;
|
||||
launch[height/span] mandelbrot_scanline(x0, dx, y0, dy, width, height, span,
|
||||
maxIterations, output);
|
||||
}
|
||||
|
||||
@@ -1,29 +1,7 @@
|
||||
|
||||
CXX=g++ -m64
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4,avx-x2 --arch=x86-64
|
||||
EXAMPLE=noise
|
||||
CPP_SRC=$(EXAMPLE).cpp $(EXAMPLE)_serial.cpp
|
||||
ISPC_SRC=noise.ispc
|
||||
ISPC_TARGETS=sse2,sse4,avx-x2
|
||||
|
||||
OBJS=objs/noise.o objs/noise_serial.o objs/noise_ispc.o objs/noise_ispc_sse2.o \
|
||||
objs/noise_ispc_sse4.o objs/noise_ispc_avx.o
|
||||
|
||||
default: noise
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ noise
|
||||
|
||||
noise: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/noise.o: objs/noise_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -1,38 +1,7 @@
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
EXAMPLE=options
|
||||
CPP_SRC=options.cpp options_serial.cpp
|
||||
ISPC_SRC=options.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx-x2
|
||||
|
||||
|
||||
CXX=g++ -m64
|
||||
CXXFLAGS=-Iobjs/ -g -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64
|
||||
|
||||
OBJS=objs/options.o objs/options_serial.o objs/options_ispc.o \
|
||||
objs/options_ispc_sse2.o objs/options_ispc_sse4.o \
|
||||
objs/options_ispc_avx.o $(TASK_OBJ)
|
||||
|
||||
default: options
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ options
|
||||
|
||||
options: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/options.o: objs/options_ispc.h options_defs.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc options_defs.h
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -77,7 +77,7 @@ black_scholes_ispc_tasks(uniform float Sa[], uniform float Xa[], uniform float T
|
||||
uniform float ra[], uniform float va[],
|
||||
uniform float result[], uniform int count) {
|
||||
uniform int nTasks = max((int)64, (int)count/16384);
|
||||
launch[nTasks] < bs_task(Sa, Xa, Ta, ra, va, result, count) >;
|
||||
launch[nTasks] bs_task(Sa, Xa, Ta, ra, va, result, count);
|
||||
}
|
||||
|
||||
|
||||
@@ -150,5 +150,5 @@ binomial_put_ispc_tasks(uniform float Sa[], uniform float Xa[],
|
||||
uniform float va[], uniform float result[],
|
||||
uniform int count) {
|
||||
uniform int nTasks = max((int)64, (int)count/16384);
|
||||
launch[nTasks] < binomial_task(Sa, Xa, Ta, ra, va, result, count) >;
|
||||
launch[nTasks] binomial_task(Sa, Xa, Ta, ra, va, result, count);
|
||||
}
|
||||
|
||||
7
examples/perfbench/Makefile
Normal file
7
examples/perfbench/Makefile
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
EXAMPLE=perbench
|
||||
CPP_SRC=perfbench.cpp perfbench_serial.cpp
|
||||
ISPC_SRC=perfbench.ispc
|
||||
ISPC_TARGETS=sse2,sse4,avx
|
||||
|
||||
include ../common.mk
|
||||
108
examples/perfbench/perfbench.cpp
Normal file
108
examples/perfbench/perfbench.cpp
Normal file
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
Copyright (c) 2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define NOMINMAX
|
||||
#pragma warning (disable: 4244)
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
#include "../timing.h"
|
||||
|
||||
#include "perfbench_ispc.h"
|
||||
|
||||
typedef void (FuncType)(float *, int, float *, float *);
|
||||
|
||||
struct PerfTest {
|
||||
FuncType *aFunc;
|
||||
const char *aName;
|
||||
FuncType *bFunc;
|
||||
const char *bName;
|
||||
const char *testName;
|
||||
};
|
||||
|
||||
extern void xyzSumAOS(float *a, int count, float *zeros, float *result);
|
||||
extern void xyzSumSOA(float *a, int count, float *zeros, float *result);
|
||||
|
||||
|
||||
static void
|
||||
lInitData(float *ptr, int count) {
|
||||
for (int i = 0; i < count; ++i)
|
||||
ptr[i] = float(i) / (1024.f * 1024.f);
|
||||
}
|
||||
|
||||
static PerfTest tests[] = {
|
||||
{ xyzSumAOS, "serial", ispc::xyzSumAOS, "ispc", "AOS vector element sum (with coalescing)" },
|
||||
{ xyzSumAOS, "serial", ispc::xyzSumAOSStdlib, "ispc", "AOS vector element sum (stdlib swizzle)" },
|
||||
{ xyzSumAOS, "serial", ispc::xyzSumAOSNoCoalesce, "ispc", "AOS vector element sum (no coalescing)" },
|
||||
{ xyzSumSOA, "serial", ispc::xyzSumSOA, "ispc", "SOA vector element sum" },
|
||||
{ ispc::gathers, "gather", ispc::loads, "vector load", "Memory reads" },
|
||||
{ ispc::scatters, "scatter", ispc::stores, "vector store", "Memory writes" },
|
||||
};
|
||||
|
||||
int main() {
|
||||
int count = 3*64*1024;
|
||||
float *a = new float[count];
|
||||
float zeros[32] = { 0 };
|
||||
|
||||
int nTests = sizeof(tests) / sizeof(tests[0]);
|
||||
for (int i = 0; i < nTests; ++i) {
|
||||
lInitData(a, count);
|
||||
reset_and_start_timer();
|
||||
float resultA[3] = { 0, 0, 0 };
|
||||
for (int j = 0; j < 100; ++j)
|
||||
tests[i].aFunc(a, count, zeros, resultA);
|
||||
double aTime = get_elapsed_mcycles();
|
||||
|
||||
lInitData(a, count);
|
||||
reset_and_start_timer();
|
||||
float resultB[3] = { 0, 0, 0 };
|
||||
for (int j = 0; j < 100; ++j)
|
||||
tests[i].bFunc(a, count, zeros, resultB);
|
||||
double bTime = get_elapsed_mcycles();
|
||||
|
||||
printf("%-40s: [%.2f] M cycles %s, [%.2f] M cycles %s (%.2fx speedup).\n",
|
||||
tests[i].testName, aTime, tests[i].aName, bTime, tests[i].bName,
|
||||
aTime/bTime);
|
||||
#if 0
|
||||
printf("\t(%f %f %f) - (%f %f %f)\n", resultSerial[0], resultSerial[1],
|
||||
resultSerial[2], resultISPC[0], resultISPC[1], resultISPC[2]);
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
170
examples/perfbench/perfbench.ispc
Normal file
170
examples/perfbench/perfbench.ispc
Normal file
@@ -0,0 +1,170 @@
|
||||
/*
|
||||
Copyright (c) 2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
export void xyzSumAOS(uniform float array[], uniform int count,
|
||||
uniform float zeros[], uniform float result[]) {
|
||||
float xsum = 0, ysum = 0, zsum = 0;
|
||||
foreach (i = 0 ... count/3) {
|
||||
float x = array[3*i];
|
||||
float y = array[3*i+1];
|
||||
float z = array[3*i+2];
|
||||
|
||||
xsum += x;
|
||||
ysum += y;
|
||||
zsum += z;
|
||||
}
|
||||
result[0] = reduce_add(xsum);
|
||||
result[1] = reduce_add(ysum);
|
||||
result[2] = reduce_add(zsum);
|
||||
}
|
||||
|
||||
export void xyzSumAOSStdlib(uniform float array[], uniform int count,
|
||||
uniform float zeros[], uniform float result[]) {
|
||||
float xsum = 0, ysum = 0, zsum = 0;
|
||||
for (uniform int i = 0; i < 64*1024 /*count/3*/; i += programCount) {
|
||||
float x, y, z;
|
||||
aos_to_soa3(&array[3*i], &x, &y, &z);
|
||||
|
||||
xsum += x;
|
||||
ysum += y;
|
||||
zsum += z;
|
||||
}
|
||||
result[0] = reduce_add(xsum);
|
||||
result[1] = reduce_add(ysum);
|
||||
result[2] = reduce_add(zsum);
|
||||
}
|
||||
|
||||
export void xyzSumAOSNoCoalesce(uniform float array[], uniform int count,
|
||||
uniform float zerosArray[], uniform float result[]) {
|
||||
int zeros = zerosArray[programIndex];
|
||||
float xsum = 0, ysum = 0, zsum = 0;
|
||||
foreach (i = 0 ... count/3) {
|
||||
float x = array[3*i+zeros];
|
||||
float y = array[3*i+1+zeros];
|
||||
float z = array[3*i+2+zeros];
|
||||
|
||||
xsum += x;
|
||||
ysum += y;
|
||||
zsum += z;
|
||||
}
|
||||
result[0] = reduce_add(xsum);
|
||||
result[1] = reduce_add(ysum);
|
||||
result[2] = reduce_add(zsum);
|
||||
}
|
||||
|
||||
export void xyzSumSOA(uniform float array[], uniform int count,
|
||||
uniform float zeros[], uniform float result[]) {
|
||||
float xsum = 0, ysum = 0, zsum = 0;
|
||||
uniform float * uniform ap = array;
|
||||
assert(programCount <= 8);
|
||||
|
||||
for (uniform int i = 0; i < count/3; i += 8, ap += 24) {
|
||||
for (uniform int j = 0; j < 8; j += programCount) {
|
||||
float x = ap[j + programIndex];
|
||||
float y = ap[8 + j + programIndex];
|
||||
float z = ap[16 + j + programIndex];
|
||||
|
||||
xsum += x;
|
||||
ysum += y;
|
||||
zsum += z;
|
||||
}
|
||||
}
|
||||
result[0] = reduce_add(xsum);
|
||||
result[1] = reduce_add(ysum);
|
||||
result[2] = reduce_add(zsum);
|
||||
}
|
||||
|
||||
export void gathers(uniform float array[], uniform int count,
|
||||
uniform float zeros[], uniform float result[]) {
|
||||
float sum = 0;
|
||||
int zero = zeros[programIndex];
|
||||
foreach (i = 0 ... count)
|
||||
sum += array[i + zero];
|
||||
result[0] = reduce_add(sum);
|
||||
}
|
||||
|
||||
|
||||
export void loads(uniform float array[], uniform int count,
|
||||
uniform float zeros[], uniform float result[]) {
|
||||
float sum = 0;
|
||||
foreach (i = 0 ... count)
|
||||
sum += array[i];
|
||||
result[0] = reduce_add(sum);
|
||||
}
|
||||
|
||||
|
||||
export void scatters(uniform float array[], uniform int count,
|
||||
uniform float zeros[], uniform float result[]) {
|
||||
int zero = zeros[programIndex];
|
||||
foreach (i = 0 ... count)
|
||||
array[i + zero] = zero;
|
||||
}
|
||||
|
||||
|
||||
export void stores(uniform float array[], uniform int count,
|
||||
uniform float zeros[], uniform float result[]) {
|
||||
int zero = zeros[programIndex];
|
||||
foreach (i = 0 ... count)
|
||||
array[i] = zero;
|
||||
}
|
||||
|
||||
export void normalizeAOSNoCoalesce(uniform float array[], uniform int count,
|
||||
uniform float zeroArray[]) {
|
||||
float zeros = zeroArray[programIndex];
|
||||
foreach (i = 0 ... count/3) {
|
||||
float x = array[3*i+zeros];
|
||||
float y = array[3*i+1+zeros];
|
||||
float z = array[3*i+2+zeros];
|
||||
|
||||
float l2 = x*x + y*y + z*z;
|
||||
|
||||
array[3*i] /= l2;
|
||||
array[3*i+1] /= l2;
|
||||
array[3*i+2] /= l2;
|
||||
}
|
||||
}
|
||||
|
||||
export void normalizeSOA(uniform float array[], uniform int count,
|
||||
uniform float zeros[]) {
|
||||
foreach (i = 0 ... count/3) {
|
||||
float x = array[3*i];
|
||||
float y = array[3*i+1];
|
||||
float z = array[3*i+2];
|
||||
|
||||
float l2 = x*x + y*y + z*z;
|
||||
|
||||
array[3*i] /= l2;
|
||||
array[3*i+1] /= l2;
|
||||
array[3*i+2] /= l2;
|
||||
}
|
||||
}
|
||||
175
examples/perfbench/perfbench.vcxproj
Normal file
175
examples/perfbench/perfbench.vcxproj
Normal file
@@ -0,0 +1,175 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{d923bb7e-a7c8-4850-8fcf-0eb9ce35b4e8}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>perfbench</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="perfbench.cpp" />
|
||||
<ClCompile Include="perfbench_serial.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="perfbench.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
61
examples/perfbench/perfbench_serial.cpp
Normal file
61
examples/perfbench/perfbench_serial.cpp
Normal file
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
Copyright (c) 2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
void
|
||||
xyzSumAOS(float *a, int count, float *zeros, float *result) {
|
||||
float xsum = 0, ysum = 0, zsum = 0;
|
||||
for (int i = 0; i < count; i += 3) {
|
||||
xsum += a[i];
|
||||
ysum += a[i+1];
|
||||
zsum += a[i+2];
|
||||
}
|
||||
result[0] = xsum;
|
||||
result[1] = ysum;
|
||||
result[2] = zsum;
|
||||
}
|
||||
|
||||
void
|
||||
xyzSumSOA(float *a, int count, float *zeros, float *result) {
|
||||
float xsum = 0, ysum = 0, zsum = 0;
|
||||
for (int i = 0; i < count/3; ++i) {
|
||||
float *p = a + (i >> 3) * 24 + (i & 7);
|
||||
xsum += p[0];
|
||||
ysum += p[8];
|
||||
zsum += p[16];
|
||||
}
|
||||
result[0] = xsum;
|
||||
result[1] = ysum;
|
||||
result[2] = zsum;
|
||||
}
|
||||
@@ -1,38 +1,7 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=rt
|
||||
CPP_SRC=rt.cpp rt_serial.cpp
|
||||
ISPC_SRC=rt.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64
|
||||
|
||||
OBJS=objs/rt.o objs/rt_serial.o $(TASK_OBJ) objs/rt_ispc.o objs/rt_ispc_sse2.o \
|
||||
objs/rt_ispc_sse4.o objs/rt_ispc_avx.o
|
||||
|
||||
default: rt
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ rt
|
||||
|
||||
rt: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/rt.o: objs/rt_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -43,17 +43,17 @@ struct Ray {
|
||||
};
|
||||
|
||||
struct Triangle {
|
||||
uniform float p[3][4];
|
||||
uniform int id;
|
||||
uniform int pad[3];
|
||||
float p[3][4];
|
||||
int id;
|
||||
int pad[3];
|
||||
};
|
||||
|
||||
struct LinearBVHNode {
|
||||
uniform float bounds[2][3];
|
||||
uniform unsigned int offset; // num primitives for leaf, second child for interior
|
||||
uniform unsigned int8 nPrimitives;
|
||||
uniform unsigned int8 splitAxis;
|
||||
uniform unsigned int16 pad;
|
||||
float bounds[2][3];
|
||||
unsigned int offset; // num primitives for leaf, second child for interior
|
||||
unsigned int8 nPrimitives;
|
||||
unsigned int8 splitAxis;
|
||||
unsigned int16 pad;
|
||||
};
|
||||
|
||||
static inline float3 Cross(const float3 v1, const float3 v2) {
|
||||
@@ -88,9 +88,12 @@ static void generateRay(uniform const float raster2camera[4][4],
|
||||
camy /= camw;
|
||||
camz /= camw;
|
||||
|
||||
ray.dir.x = camera2world[0][0] * camx + camera2world[0][1] * camy + camera2world[0][2] * camz;
|
||||
ray.dir.y = camera2world[1][0] * camx + camera2world[1][1] * camy + camera2world[1][2] * camz;
|
||||
ray.dir.z = camera2world[2][0] * camx + camera2world[2][1] * camy + camera2world[2][2] * camz;
|
||||
ray.dir.x = camera2world[0][0] * camx + camera2world[0][1] * camy +
|
||||
camera2world[0][2] * camz;
|
||||
ray.dir.y = camera2world[1][0] * camx + camera2world[1][1] * camy +
|
||||
camera2world[1][2] * camz;
|
||||
ray.dir.z = camera2world[2][0] * camx + camera2world[2][1] * camy +
|
||||
camera2world[2][2] * camz;
|
||||
|
||||
ray.origin.x = camera2world[0][3] / camera2world[3][3];
|
||||
ray.origin.y = camera2world[1][3] / camera2world[3][3];
|
||||
@@ -104,8 +107,8 @@ static void generateRay(uniform const float raster2camera[4][4],
|
||||
}
|
||||
|
||||
|
||||
static inline bool BBoxIntersect(const uniform float bounds[2][3],
|
||||
const Ray &ray) {
|
||||
static bool BBoxIntersect(const uniform float bounds[2][3],
|
||||
const Ray &ray) {
|
||||
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
|
||||
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
|
||||
float t0 = ray.mint, t1 = ray.maxt;
|
||||
@@ -143,7 +146,7 @@ static inline bool BBoxIntersect(const uniform float bounds[2][3],
|
||||
|
||||
|
||||
|
||||
static inline bool TriIntersect(const Triangle &tri, Ray &ray) {
|
||||
static bool TriIntersect(const uniform Triangle &tri, Ray &ray) {
|
||||
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
|
||||
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
|
||||
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
|
||||
@@ -183,8 +186,8 @@ static inline bool TriIntersect(const Triangle &tri, Ray &ray) {
|
||||
}
|
||||
|
||||
|
||||
bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
||||
Ray &r) {
|
||||
bool BVHIntersect(const uniform LinearBVHNode nodes[],
|
||||
const uniform Triangle tris[], Ray &r) {
|
||||
Ray ray = r;
|
||||
bool hit = false;
|
||||
// Follow ray through BVH nodes to find primitive intersections
|
||||
@@ -193,7 +196,7 @@ bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
||||
|
||||
while (true) {
|
||||
// Check ray against BVH node
|
||||
LinearBVHNode node = nodes[nodeNum];
|
||||
uniform LinearBVHNode node = nodes[nodeNum];
|
||||
if (any(BBoxIntersect(node.bounds, ray))) {
|
||||
uniform unsigned int nPrimitives = node.nPrimitives;
|
||||
if (nPrimitives > 0) {
|
||||
@@ -239,8 +242,8 @@ static void raytrace_tile(uniform int x0, uniform int x1,
|
||||
const uniform float raster2camera[4][4],
|
||||
const uniform float camera2world[4][4],
|
||||
uniform float image[], uniform int id[],
|
||||
const LinearBVHNode nodes[],
|
||||
const Triangle triangles[]) {
|
||||
const uniform LinearBVHNode nodes[],
|
||||
const uniform Triangle triangles[]) {
|
||||
uniform float widthScale = (float)(baseWidth) / (float)(width);
|
||||
uniform float heightScale = (float)(baseHeight) / (float)(height);
|
||||
|
||||
@@ -262,8 +265,8 @@ export void raytrace_ispc(uniform int width, uniform int height,
|
||||
const uniform float raster2camera[4][4],
|
||||
const uniform float camera2world[4][4],
|
||||
uniform float image[], uniform int id[],
|
||||
const LinearBVHNode nodes[],
|
||||
const Triangle triangles[]) {
|
||||
const uniform LinearBVHNode nodes[],
|
||||
const uniform Triangle triangles[]) {
|
||||
raytrace_tile(0, width, 0, height, width, height, baseWidth, baseHeight,
|
||||
raster2camera, camera2world, image,
|
||||
id, nodes, triangles);
|
||||
@@ -275,8 +278,8 @@ task void raytrace_tile_task(uniform int width, uniform int height,
|
||||
const uniform float raster2camera[4][4],
|
||||
const uniform float camera2world[4][4],
|
||||
uniform float image[], uniform int id[],
|
||||
const LinearBVHNode nodes[],
|
||||
const Triangle triangles[]) {
|
||||
const uniform LinearBVHNode nodes[],
|
||||
const uniform Triangle triangles[]) {
|
||||
uniform int dx = 16, dy = 16; // must match dx, dy below
|
||||
uniform int xBuckets = (width + (dx-1)) / dx;
|
||||
uniform int x0 = (taskIndex % xBuckets) * dx;
|
||||
@@ -295,14 +298,14 @@ export void raytrace_ispc_tasks(uniform int width, uniform int height,
|
||||
const uniform float raster2camera[4][4],
|
||||
const uniform float camera2world[4][4],
|
||||
uniform float image[], uniform int id[],
|
||||
const LinearBVHNode nodes[],
|
||||
const Triangle triangles[]) {
|
||||
const uniform LinearBVHNode nodes[],
|
||||
const uniform Triangle triangles[]) {
|
||||
uniform int dx = 16, dy = 16;
|
||||
uniform int xBuckets = (width + (dx-1)) / dx;
|
||||
uniform int yBuckets = (height + (dy-1)) / dy;
|
||||
uniform int nTasks = xBuckets * yBuckets;
|
||||
launch[nTasks] < raytrace_tile_task(width, height, baseWidth, baseHeight,
|
||||
raster2camera, camera2world,
|
||||
image, id, nodes, triangles) >;
|
||||
launch[nTasks] raytrace_tile_task(width, height, baseWidth, baseHeight,
|
||||
raster2camera, camera2world,
|
||||
image, id, nodes, triangles);
|
||||
}
|
||||
|
||||
|
||||
@@ -123,9 +123,12 @@ static void generateRay(const float raster2camera[4][4],
|
||||
camy /= camw;
|
||||
camz /= camw;
|
||||
|
||||
ray.dir.x = camera2world[0][0] * camx + camera2world[0][1] * camy + camera2world[0][2] * camz;
|
||||
ray.dir.y = camera2world[1][0] * camx + camera2world[1][1] * camy + camera2world[1][2] * camz;
|
||||
ray.dir.z = camera2world[2][0] * camx + camera2world[2][1] * camy + camera2world[2][2] * camz;
|
||||
ray.dir.x = camera2world[0][0] * camx + camera2world[0][1] * camy +
|
||||
camera2world[0][2] * camz;
|
||||
ray.dir.y = camera2world[1][0] * camx + camera2world[1][1] * camy +
|
||||
camera2world[1][2] * camz;
|
||||
ray.dir.z = camera2world[2][0] * camx + camera2world[2][1] * camy +
|
||||
camera2world[2][2] * camz;
|
||||
|
||||
ray.origin.x = camera2world[0][3] / camera2world[3][3];
|
||||
ray.origin.y = camera2world[1][3] / camera2world[3][3];
|
||||
|
||||
@@ -1,39 +1,7 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=stencil
|
||||
CPP_SRC=stencil.cpp stencil_serial.cpp
|
||||
ISPC_SRC=stencil.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx-x2
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64
|
||||
|
||||
OBJS=objs/stencil.o objs/stencil_serial.o $(TASK_OBJ) objs/stencil_ispc.o \
|
||||
objs/stencil_ispc_sse2.o objs/stencil_ispc_sse4.o \
|
||||
objs/stencil_ispc_avx.o
|
||||
|
||||
default: stencil
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ stencil
|
||||
|
||||
stencil: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/stencil.o: objs/stencil_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -41,27 +41,23 @@ stencil_step(uniform int x0, uniform int x1,
|
||||
uniform const float Ain[], uniform float Aout[]) {
|
||||
const uniform int Nxy = Nx * Ny;
|
||||
|
||||
for (uniform int z = z0; z < z1; ++z) {
|
||||
for (uniform int y = y0; y < y1; ++y) {
|
||||
foreach (x = x0 ... x1) {
|
||||
int index = (z * Nxy) + (y * Nx) + x;
|
||||
foreach (z = z0 ... z1, y = y0 ... y1, x = x0 ... x1) {
|
||||
int index = (z * Nxy) + (y * Nx) + x;
|
||||
#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||
#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||
float div = coef[0] * A_cur(0, 0, 0) +
|
||||
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
|
||||
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
|
||||
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
|
||||
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
|
||||
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
|
||||
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
|
||||
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
|
||||
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
|
||||
A_cur(0, 0, +3) + A_cur(0, 0, -3));
|
||||
float div = coef[0] * A_cur(0, 0, 0) +
|
||||
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
|
||||
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
|
||||
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
|
||||
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
|
||||
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
|
||||
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
|
||||
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
|
||||
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
|
||||
A_cur(0, 0, +3) + A_cur(0, 0, -3));
|
||||
|
||||
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
|
||||
vsq[index] * div;
|
||||
}
|
||||
}
|
||||
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
|
||||
vsq[index] * div;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,11 +65,12 @@ stencil_step(uniform int x0, uniform int x1,
|
||||
static task void
|
||||
stencil_step_task(uniform int x0, uniform int x1,
|
||||
uniform int y0, uniform int y1,
|
||||
uniform int z0, uniform int z1,
|
||||
uniform int z0,
|
||||
uniform int Nx, uniform int Ny, uniform int Nz,
|
||||
uniform const float coef[4], uniform const float vsq[],
|
||||
uniform const float Ain[], uniform float Aout[]) {
|
||||
stencil_step(x0, x1, y0, y1, z0, z1, Nx, Ny, Nz, coef, vsq, Ain, Aout);
|
||||
stencil_step(x0, x1, y0, y1, z0+taskIndex, z0+taskIndex+1,
|
||||
Nx, Ny, Nz, coef, vsq, Ain, Aout);
|
||||
}
|
||||
|
||||
|
||||
@@ -89,17 +86,14 @@ loop_stencil_ispc_tasks(uniform int t0, uniform int t1,
|
||||
{
|
||||
for (uniform int t = t0; t < t1; ++t) {
|
||||
// Parallelize across cores as well: each task will work on a slice
|
||||
// of "dz" in the z extent of the volume. (dz=1 seems to work
|
||||
// better than any larger values.)
|
||||
uniform int dz = 1;
|
||||
for (uniform int z = z0; z < z1; z += dz) {
|
||||
if ((t & 1) == 0)
|
||||
launch < stencil_step_task(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz,
|
||||
coef, vsq, Aeven, Aodd) >;
|
||||
else
|
||||
launch < stencil_step_task(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz,
|
||||
coef, vsq, Aodd, Aeven) >;
|
||||
}
|
||||
// of 1 in the z extent of the volume.
|
||||
if ((t & 1) == 0)
|
||||
launch[z1-z0] stencil_step_task(x0, x1, y0, y1, z0, Nx, Ny, Nz,
|
||||
coef, vsq, Aeven, Aodd);
|
||||
else
|
||||
launch[z1-z0] stencil_step_task(x0, x1, y0, y1, z0, Nx, Ny, Nz,
|
||||
coef, vsq, Aodd, Aeven);
|
||||
|
||||
// We need to wait for all of the launched tasks to finish before
|
||||
// starting the next iteration.
|
||||
sync;
|
||||
|
||||
@@ -273,7 +273,7 @@ lAtomicCompareAndSwapPointer(void **v, void *newValue, void *oldValue) {
|
||||
#else
|
||||
void *result;
|
||||
#if (ISPC_POINTER_BYTES == 4)
|
||||
__asm__ __volatile__("lock\ncmpxchgd %2,%1"
|
||||
__asm__ __volatile__("lock\ncmpxchgl %2,%1"
|
||||
: "=a"(result), "=m"(*v)
|
||||
: "q"(newValue), "0"(oldValue)
|
||||
: "memory");
|
||||
|
||||
@@ -1,38 +1,7 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=volume
|
||||
CPP_SRC=volume.cpp volume_serial.cpp
|
||||
ISPC_SRC=volume.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64
|
||||
|
||||
OBJS=objs/volume.o objs/volume_serial.o $(TASK_OBJ) objs/volume_ispc.o \
|
||||
objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o
|
||||
|
||||
default: volume
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ volume
|
||||
|
||||
volume: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/volume.o: objs/volume_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -124,24 +124,13 @@ static inline float D(int x, int y, int z, uniform int nVoxels[3],
|
||||
}
|
||||
|
||||
|
||||
static inline float Du(uniform int x, uniform int y, uniform int z,
|
||||
uniform int nVoxels[3], uniform float density[]) {
|
||||
x = clamp(x, 0, nVoxels[0]-1);
|
||||
y = clamp(y, 0, nVoxels[1]-1);
|
||||
z = clamp(z, 0, nVoxels[2]-1);
|
||||
|
||||
return density[z*nVoxels[0]*nVoxels[1] + y*nVoxels[0] + x];
|
||||
}
|
||||
|
||||
|
||||
static inline float3 Offset(float3 p, float3 pMin, float3 pMax) {
|
||||
return (p - pMin) / (pMax - pMin);
|
||||
}
|
||||
|
||||
|
||||
static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
uniform float density[], uniform int nVoxels[3],
|
||||
uniform bool &checkForSameVoxel) {
|
||||
static float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
uniform float density[], uniform int nVoxels[3]) {
|
||||
if (!Inside(Pobj, pMin, pMax))
|
||||
return 0;
|
||||
// Compute voxel coordinates and offsets for _Pobj_
|
||||
@@ -153,39 +142,14 @@ static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
float dx = vox.x - vx, dy = vox.y - vy, dz = vox.z - vz;
|
||||
|
||||
// Trilinearly interpolate density values to compute local density
|
||||
float d00, d10, d01, d11;
|
||||
uniform int uvx, uvy, uvz;
|
||||
if (checkForSameVoxel && reduce_equal(vx, &uvx) && reduce_equal(vy, &uvy) &&
|
||||
reduce_equal(vz, &uvz)) {
|
||||
// If all of the program instances are inside the same voxel, then
|
||||
// we'll call the 'uniform' variant of the voxel density lookup
|
||||
// function, thus doing a single load for each value rather than a
|
||||
// gather.
|
||||
d00 = Lerp(dx, Du(uvx, uvy, uvz, nVoxels, density),
|
||||
Du(uvx+1, uvy, uvz, nVoxels, density));
|
||||
d10 = Lerp(dx, Du(uvx, uvy+1, uvz, nVoxels, density),
|
||||
Du(uvx+1, uvy+1, uvz, nVoxels, density));
|
||||
d01 = Lerp(dx, Du(uvx, uvy, uvz+1, nVoxels, density),
|
||||
Du(uvx+1, uvy, uvz+1, nVoxels, density));
|
||||
d11 = Lerp(dx, Du(uvx, uvy+1, uvz+1, nVoxels, density),
|
||||
Du(uvx+1, uvy+1, uvz+1, nVoxels, density));
|
||||
}
|
||||
else {
|
||||
// Otherwise, we have to do an actual gather in the more general
|
||||
// D() function. Once the reduce_equal tests above fail, we stop
|
||||
// checking in subsequent steps, since it's unlikely that this will
|
||||
// be true in the future once they've diverged into different
|
||||
// voxels.
|
||||
checkForSameVoxel = false;
|
||||
d00 = Lerp(dx, D(vx, vy, vz, nVoxels, density),
|
||||
D(vx+1, vy, vz, nVoxels, density));
|
||||
d10 = Lerp(dx, D(vx, vy+1, vz, nVoxels, density),
|
||||
D(vx+1, vy+1, vz, nVoxels, density));
|
||||
d01 = Lerp(dx, D(vx, vy, vz+1, nVoxels, density),
|
||||
D(vx+1, vy, vz+1, nVoxels, density));
|
||||
d11 = Lerp(dx, D(vx, vy+1, vz+1, nVoxels, density),
|
||||
D(vx+1, vy+1, vz+1, nVoxels, density));
|
||||
}
|
||||
float d00 = Lerp(dx, D(vx, vy, vz, nVoxels, density),
|
||||
D(vx+1, vy, vz, nVoxels, density));
|
||||
float d10 = Lerp(dx, D(vx, vy+1, vz, nVoxels, density),
|
||||
D(vx+1, vy+1, vz, nVoxels, density));
|
||||
float d01 = Lerp(dx, D(vx, vy, vz+1, nVoxels, density),
|
||||
D(vx+1, vy, vz+1, nVoxels, density));
|
||||
float d11 = Lerp(dx, D(vx, vy+1, vz+1, nVoxels, density),
|
||||
D(vx+1, vy+1, vz+1, nVoxels, density));
|
||||
float d0 = Lerp(dy, d00, d10);
|
||||
float d1 = Lerp(dy, d01, d11);
|
||||
return Lerp(dz, d0, d1);
|
||||
@@ -221,10 +185,8 @@ transmittance(uniform float3 p0, float3 p1, uniform float3 pMin,
|
||||
float t = rayT0;
|
||||
float3 pos = ray.origin + ray.dir * rayT0;
|
||||
float3 dirStep = ray.dir * stepT;
|
||||
uniform bool checkForSameVoxel = true;
|
||||
while (t < rayT1) {
|
||||
tau += stepDist * sigma_t * Density(pos, pMin, pMax, density, nVoxels,
|
||||
checkForSameVoxel);
|
||||
tau += stepDist * sigma_t * Density(pos, pMin, pMax, density, nVoxels);
|
||||
pos = pos + dirStep;
|
||||
t += stepT;
|
||||
}
|
||||
@@ -268,9 +230,8 @@ raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
|
||||
float t = rayT0;
|
||||
float3 pos = ray.origin + ray.dir * rayT0;
|
||||
float3 dirStep = ray.dir * stepT;
|
||||
uniform bool checkForSameVoxel = true;
|
||||
cwhile (t < rayT1) {
|
||||
float d = Density(pos, pMin, pMax, density, nVoxels, checkForSameVoxel);
|
||||
float d = Density(pos, pMin, pMax, density, nVoxels);
|
||||
|
||||
// terminate once attenuation is high
|
||||
float atten = exp(-tau);
|
||||
@@ -375,6 +336,6 @@ volume_ispc_tasks(uniform float density[], uniform int nVoxels[3],
|
||||
// Launch tasks to work on (dx,dy)-sized tiles of the image
|
||||
uniform int dx = 8, dy = 8;
|
||||
uniform int nTasks = ((width+(dx-1))/dx) * ((height+(dy-1))/dy);
|
||||
launch[nTasks] < volume_task(density, nVoxels, raster2camera, camera2world,
|
||||
width, height, image) >;
|
||||
launch[nTasks] volume_task(density, nVoxels, raster2camera, camera2world,
|
||||
width, height, image);
|
||||
}
|
||||
|
||||
@@ -156,18 +156,18 @@
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="volume.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
|
||||
125
expr.h
125
expr.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -299,7 +299,6 @@ public:
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
const Type *GetLValueType() const;
|
||||
Symbol *GetBaseSymbol() const;
|
||||
void Print() const;
|
||||
Expr *Optimize();
|
||||
@@ -314,7 +313,6 @@ public:
|
||||
std::string identifier;
|
||||
const SourcePos identifierPos;
|
||||
|
||||
protected:
|
||||
MemberExpr(Expr *expr, const char *identifier, SourcePos pos,
|
||||
SourcePos identifierPos, bool derefLValue);
|
||||
|
||||
@@ -389,6 +387,10 @@ public:
|
||||
with values given by the "vales" parameter. */
|
||||
ConstExpr(ConstExpr *old, double *values);
|
||||
|
||||
/** Create ConstExpr with the same type and values as the given one,
|
||||
but at the given position. */
|
||||
ConstExpr(ConstExpr *old, SourcePos pos);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
@@ -493,8 +495,7 @@ private:
|
||||
probably-different type. */
|
||||
class TypeCastExpr : public Expr {
|
||||
public:
|
||||
TypeCastExpr(const Type *t, Expr *e, bool preserveUniformity,
|
||||
SourcePos p);
|
||||
TypeCastExpr(const Type *t, Expr *e, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
@@ -507,7 +508,6 @@ public:
|
||||
|
||||
const Type *type;
|
||||
Expr *expr;
|
||||
bool preserveUniformity;
|
||||
};
|
||||
|
||||
|
||||
@@ -530,26 +530,48 @@ public:
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression that represents dereferencing a reference to get its
|
||||
value. */
|
||||
class DereferenceExpr : public Expr {
|
||||
/** @brief Common base class that provides shared functionality for
|
||||
PtrDerefExpr and RefDerefExpr. */
|
||||
class DerefExpr : public Expr {
|
||||
public:
|
||||
DereferenceExpr(Expr *e, SourcePos p);
|
||||
DerefExpr(Expr *e, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
const Type *GetLValueType() const;
|
||||
Symbol *GetBaseSymbol() const;
|
||||
void Print() const;
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
int EstimateCost() const;
|
||||
|
||||
Expr *expr;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression that represents dereferencing a pointer to get its
|
||||
value. */
|
||||
class PtrDerefExpr : public DerefExpr {
|
||||
public:
|
||||
PtrDerefExpr(Expr *e, SourcePos p);
|
||||
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
Expr *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression that represents dereferencing a reference to get its
|
||||
value. */
|
||||
class RefDerefExpr : public DerefExpr {
|
||||
public:
|
||||
RefDerefExpr(Expr *e, SourcePos p);
|
||||
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
Expr *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
};
|
||||
|
||||
|
||||
/** Expression that represents taking the address of an expression. */
|
||||
class AddressOfExpr : public Expr {
|
||||
public:
|
||||
@@ -562,6 +584,7 @@ public:
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
int EstimateCost() const;
|
||||
llvm::Constant *GetConstant(const Type *type) const;
|
||||
|
||||
Expr *expr;
|
||||
};
|
||||
@@ -629,19 +652,26 @@ public:
|
||||
function overloading, this method resolves which actual function
|
||||
the arguments match best. If the argCouldBeNULL parameter is
|
||||
non-NULL, each element indicates whether the corresponding argument
|
||||
is the number zero, indicating that it could be a NULL pointer.
|
||||
This parameter may be NULL (for cases where overload resolution is
|
||||
being done just given type information without the parameter
|
||||
argument expressions being available. It returns true on success.
|
||||
is the number zero, indicating that it could be a NULL pointer, and
|
||||
if argIsConstant is non-NULL, each element indicates whether the
|
||||
corresponding argument is a compile-time constant value. Both of
|
||||
these parameters may be NULL (for cases where overload resolution
|
||||
is being done just given type information without the parameter
|
||||
argument expressions being available. This function returns true
|
||||
on success.
|
||||
*/
|
||||
bool ResolveOverloads(const std::vector<const Type *> &argTypes,
|
||||
const std::vector<bool> *argCouldBeNULL = NULL);
|
||||
bool ResolveOverloads(SourcePos argPos,
|
||||
const std::vector<const Type *> &argTypes,
|
||||
const std::vector<bool> *argCouldBeNULL = NULL,
|
||||
const std::vector<bool> *argIsConstant = NULL);
|
||||
Symbol *GetMatchingFunction();
|
||||
|
||||
private:
|
||||
bool tryResolve(int (*matchFunc)(const Type *, const Type *),
|
||||
const std::vector<const Type *> &argTypes,
|
||||
const std::vector<bool> *argCouldBeNULL);
|
||||
std::vector<Symbol *> getCandidateFunctions(int argCount) const;
|
||||
static int computeOverloadCost(const FunctionType *ftype,
|
||||
const std::vector<const Type *> &argTypes,
|
||||
const std::vector<bool> *argCouldBeNULL,
|
||||
const std::vector<bool> *argIsConstant);
|
||||
|
||||
/** Name of the function that is being called. */
|
||||
std::string name;
|
||||
@@ -682,11 +712,44 @@ public:
|
||||
const Type *GetType() const;
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
llvm::Constant *GetConstant(const Type *type) const;
|
||||
void Print() const;
|
||||
int EstimateCost() const;
|
||||
};
|
||||
|
||||
|
||||
/** An expression representing a "new" expression, used for dynamically
|
||||
allocating memory.
|
||||
*/
|
||||
class NewExpr : public Expr {
|
||||
public:
|
||||
NewExpr(int typeQual, const Type *type, Expr *initializer, Expr *count,
|
||||
SourcePos tqPos, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
void Print() const;
|
||||
int EstimateCost() const;
|
||||
|
||||
/** Type of object to allocate storage for. */
|
||||
const Type *allocType;
|
||||
/** Expression giving the number of elements to allocate, when the
|
||||
"new Foo[expr]" form is used. This may be NULL, in which case a
|
||||
single element of the given type will be allocated. */
|
||||
Expr *countExpr;
|
||||
/** Optional initializer expression used to initialize the allocated
|
||||
memory. */
|
||||
Expr *initExpr;
|
||||
/** Indicates whether this is a "varying new" or "uniform new"
|
||||
(i.e. whether a separate allocation is performed per program
|
||||
instance, or whether a single allocation is performed for the
|
||||
entire gang of program instances.) */
|
||||
bool isVarying;
|
||||
};
|
||||
|
||||
|
||||
/** This function indicates whether it's legal to convert from fromType to
|
||||
toType. If the optional errorMsgBase and source position parameters
|
||||
are provided, then an error message is issued if the type conversion
|
||||
@@ -705,4 +768,20 @@ bool CanConvertTypes(const Type *fromType, const Type *toType,
|
||||
*/
|
||||
Expr *TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase);
|
||||
|
||||
/** Utility routine that emits code to initialize a symbol given an
|
||||
initializer expression.
|
||||
|
||||
@param lvalue Memory location of storage for the symbol's data
|
||||
@param symName Name of symbol (used in error messages)
|
||||
@param symType Type of variable being initialized
|
||||
@param initExpr Expression for the initializer
|
||||
@param ctx FunctionEmitContext to use for generating instructions
|
||||
@param pos Source file position of the variable being initialized
|
||||
*/
|
||||
void
|
||||
InitSymbol(llvm::Value *lvalue, const Type *symType, Expr *initExpr,
|
||||
FunctionEmitContext *ctx, SourcePos pos);
|
||||
|
||||
bool PossiblyResolveFunctionOverloads(Expr *expr, const Type *type);
|
||||
|
||||
#endif // ISPC_EXPR_H
|
||||
|
||||
178
func.cpp
178
func.cpp
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
Copyright (c) 2011-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -66,23 +66,15 @@
|
||||
#include <llvm/Support/ToolOutputFile.h>
|
||||
#include <llvm/Assembly/PrintModulePass.h>
|
||||
|
||||
Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
Function::Function(Symbol *s, Stmt *c) {
|
||||
sym = s;
|
||||
args = a;
|
||||
code = c;
|
||||
|
||||
maskSymbol = m->symbolTable->LookupVariable("__mask");
|
||||
assert(maskSymbol != NULL);
|
||||
Assert(maskSymbol != NULL);
|
||||
|
||||
if (code != NULL) {
|
||||
if (g->debugPrint) {
|
||||
fprintf(stderr, "Creating function \"%s\". Initial code:\n",
|
||||
sym->name.c_str());
|
||||
code->Print(0);
|
||||
fprintf(stderr, "---------------------\n");
|
||||
}
|
||||
|
||||
code = code->TypeCheck();
|
||||
code = TypeCheck(code);
|
||||
|
||||
if (code != NULL && g->debugPrint) {
|
||||
fprintf(stderr, "After typechecking function \"%s\":\n",
|
||||
@@ -92,7 +84,7 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
}
|
||||
|
||||
if (code != NULL) {
|
||||
code = code->Optimize();
|
||||
code = Optimize(code);
|
||||
if (g->debugPrint) {
|
||||
fprintf(stderr, "After optimizing function \"%s\":\n",
|
||||
sym->name.c_str());
|
||||
@@ -109,21 +101,29 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
}
|
||||
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
|
||||
for (unsigned int i = 0; i < args.size(); ++i)
|
||||
if (dynamic_cast<const ReferenceType *>(args[i]->type) == NULL)
|
||||
args[i]->parentFunction = this;
|
||||
for (int i = 0; i < type->GetNumParameters(); ++i) {
|
||||
const char *paramName = type->GetParameterName(i).c_str();
|
||||
Symbol *sym = m->symbolTable->LookupVariable(paramName);
|
||||
if (sym == NULL)
|
||||
Assert(strncmp(paramName, "__anon_parameter_", 17) == 0);
|
||||
args.push_back(sym);
|
||||
|
||||
const Type *t = type->GetParameterType(i);
|
||||
if (sym != NULL && dynamic_cast<const ReferenceType *>(t) == NULL)
|
||||
sym->parentFunction = this;
|
||||
}
|
||||
|
||||
if (type->isTask) {
|
||||
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
|
||||
assert(threadIndexSym);
|
||||
Assert(threadIndexSym);
|
||||
threadCountSym = m->symbolTable->LookupVariable("threadCount");
|
||||
assert(threadCountSym);
|
||||
Assert(threadCountSym);
|
||||
taskIndexSym = m->symbolTable->LookupVariable("taskIndex");
|
||||
assert(taskIndexSym);
|
||||
Assert(taskIndexSym);
|
||||
taskCountSym = m->symbolTable->LookupVariable("taskCount");
|
||||
assert(taskCountSym);
|
||||
Assert(taskCountSym);
|
||||
}
|
||||
else
|
||||
threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL;
|
||||
@@ -133,7 +133,7 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
const Type *
|
||||
Function::GetReturnType() const {
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
return type->GetReturnType();
|
||||
}
|
||||
|
||||
@@ -141,7 +141,7 @@ Function::GetReturnType() const {
|
||||
const FunctionType *
|
||||
Function::GetType() const {
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
return type;
|
||||
}
|
||||
|
||||
@@ -152,21 +152,26 @@ Function::GetType() const {
|
||||
'mem2reg' pass will in turn promote to SSA registers..
|
||||
*/
|
||||
static void
|
||||
lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol *> &args,
|
||||
lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const
|
||||
std::vector<Symbol *> &args,
|
||||
FunctionEmitContext *ctx) {
|
||||
// We expect the argument structure to come in as a poitner to a
|
||||
// structure. Confirm and figure out its type here.
|
||||
const llvm::Type *structArgType = structArgPtr->getType();
|
||||
assert(llvm::isa<llvm::PointerType>(structArgType));
|
||||
Assert(llvm::isa<llvm::PointerType>(structArgType));
|
||||
const llvm::PointerType *pt = llvm::dyn_cast<const llvm::PointerType>(structArgType);
|
||||
assert(llvm::isa<llvm::StructType>(pt->getElementType()));
|
||||
Assert(llvm::isa<llvm::StructType>(pt->getElementType()));
|
||||
const llvm::StructType *argStructType =
|
||||
llvm::dyn_cast<const llvm::StructType>(pt->getElementType());
|
||||
|
||||
// Get the type of the argument we're copying in and its Symbol pointer
|
||||
LLVM_TYPE_CONST llvm::Type *argType = argStructType->getElementType(i);
|
||||
llvm::Type *argType = argStructType->getElementType(i);
|
||||
Symbol *sym = args[i];
|
||||
|
||||
if (sym == NULL)
|
||||
// anonymous parameter, so don't worry about it
|
||||
return;
|
||||
|
||||
// allocate space to copy the parameter in to
|
||||
sym->storagePtr = ctx->AllocaInst(argType, sym->name.c_str());
|
||||
|
||||
@@ -189,10 +194,9 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
|
||||
void
|
||||
Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
SourcePos firstStmtPos) {
|
||||
llvm::Value *maskPtr = ctx->AllocaInst(LLVMTypes::MaskType, "mask_memory");
|
||||
ctx->StoreInst(LLVMMaskAllOn, maskPtr);
|
||||
maskSymbol->storagePtr = maskPtr;
|
||||
ctx->SetMaskPointer(maskPtr);
|
||||
// Connect the __mask builtin to the location in memory that stores its
|
||||
// value
|
||||
maskSymbol->storagePtr = ctx->GetFullMaskPointer();
|
||||
|
||||
// add debugging info for __mask, programIndex, ...
|
||||
maskSymbol->pos = firstStmtPos;
|
||||
@@ -202,7 +206,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
llvm::BasicBlock *entryBBlock = ctx->GetCurrentBasicBlock();
|
||||
#endif
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
if (type->isTask == true) {
|
||||
// For tasks, we there should always be three parmeters: the
|
||||
// pointer to the structure that holds all of the arguments, the
|
||||
@@ -248,6 +252,10 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
llvm::Function::arg_iterator argIter = function->arg_begin();
|
||||
for (unsigned int i = 0; i < args.size(); ++i, ++argIter) {
|
||||
Symbol *sym = args[i];
|
||||
if (sym == NULL)
|
||||
// anonymous function parameter
|
||||
continue;
|
||||
|
||||
argIter->setName(sym->name.c_str());
|
||||
|
||||
// Allocate stack storage for the parameter and emit code
|
||||
@@ -267,38 +275,87 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
else {
|
||||
// Otherwise use the mask to set the entry mask value
|
||||
argIter->setName("__mask");
|
||||
assert(argIter->getType() == LLVMTypes::MaskType);
|
||||
Assert(argIter->getType() == LLVMTypes::MaskType);
|
||||
ctx->SetFunctionMask(argIter);
|
||||
assert(++argIter == function->arg_end());
|
||||
Assert(++argIter == function->arg_end());
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, we can generate code for the function
|
||||
if (code != NULL) {
|
||||
int costEstimate = code->EstimateCost();
|
||||
ctx->SetDebugPos(code->pos);
|
||||
ctx->AddInstrumentationPoint("function entry");
|
||||
|
||||
int costEstimate = EstimateCost(code);
|
||||
Debug(code->pos, "Estimated cost for function \"%s\" = %d\n",
|
||||
sym->name.c_str(), costEstimate);
|
||||
|
||||
// If the body of the function is non-trivial, then we wrap the
|
||||
// entire thing inside code that tests to see if the mask is all
|
||||
// on, all off, or mixed. If this is a simple function, then this
|
||||
// isn't worth the code bloat / overhead.
|
||||
bool checkMask = (type->isTask == true) ||
|
||||
((function->hasFnAttr(llvm::Attribute::AlwaysInline) == false) &&
|
||||
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);
|
||||
Debug(code->pos, "Estimated cost for function \"%s\" = %d\n",
|
||||
sym->name.c_str(), costEstimate);
|
||||
// If the body of the function is non-trivial, then we wrap the
|
||||
// entire thing around a varying "cif (true)" test in order to reap
|
||||
// the side-effect benefit of checking to see if the execution mask
|
||||
// is all on and thence having a specialized code path for that
|
||||
// case. If this is a simple function, then this isn't worth the
|
||||
// code bloat / overhead.
|
||||
if (checkMask) {
|
||||
bool allTrue[ISPC_MAX_NVEC];
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
allTrue[i] = true;
|
||||
Expr *trueExpr = new ConstExpr(AtomicType::VaryingBool, allTrue,
|
||||
code->pos);
|
||||
code = new IfStmt(trueExpr, code, NULL, true, code->pos);
|
||||
}
|
||||
checkMask &= (g->target.maskingIsFree == false);
|
||||
checkMask &= (g->opt.disableCoherentControlFlow == false);
|
||||
|
||||
ctx->SetDebugPos(code->pos);
|
||||
ctx->AddInstrumentationPoint("function entry");
|
||||
code->EmitCode(ctx);
|
||||
if (checkMask) {
|
||||
llvm::Value *mask = ctx->GetFunctionMask();
|
||||
llvm::Value *allOn = ctx->All(mask);
|
||||
llvm::BasicBlock *bbAllOn = ctx->CreateBasicBlock("all_on");
|
||||
llvm::BasicBlock *bbNotAll = ctx->CreateBasicBlock("not_all_on");
|
||||
|
||||
// Set up basic blocks for goto targets
|
||||
ctx->InitializeLabelMap(code);
|
||||
|
||||
ctx->BranchInst(bbAllOn, bbNotAll, allOn);
|
||||
// all on: we've determined dynamically that the mask is all
|
||||
// on. Set the current mask to "all on" explicitly so that
|
||||
// codegen for this path can be improved with this knowledge in
|
||||
// hand...
|
||||
ctx->SetCurrentBasicBlock(bbAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
code->EmitCode(ctx);
|
||||
if (ctx->GetCurrentBasicBlock())
|
||||
ctx->ReturnInst();
|
||||
|
||||
// not all on: figure out if no instances are running, or if
|
||||
// some of them are
|
||||
ctx->SetCurrentBasicBlock(bbNotAll);
|
||||
ctx->SetFunctionMask(mask);
|
||||
llvm::BasicBlock *bbNoneOn = ctx->CreateBasicBlock("none_on");
|
||||
llvm::BasicBlock *bbSomeOn = ctx->CreateBasicBlock("some_on");
|
||||
llvm::Value *anyOn = ctx->Any(mask);
|
||||
ctx->BranchInst(bbSomeOn, bbNoneOn, anyOn);
|
||||
|
||||
// Everyone is off; get out of here.
|
||||
ctx->SetCurrentBasicBlock(bbNoneOn);
|
||||
ctx->ReturnInst();
|
||||
|
||||
// some on: reset the mask to the value it had at function
|
||||
// entry and emit the code. Resetting the mask here is
|
||||
// important, due to the "all on" setting of it for the path
|
||||
// above
|
||||
ctx->SetCurrentBasicBlock(bbSomeOn);
|
||||
ctx->SetFunctionMask(mask);
|
||||
|
||||
// Set up basic blocks for goto targets again; we want to have
|
||||
// one set of them for gotos in the 'all on' case, and a
|
||||
// distinct set for the 'mixed mask' case.
|
||||
ctx->InitializeLabelMap(code);
|
||||
|
||||
code->EmitCode(ctx);
|
||||
if (ctx->GetCurrentBasicBlock())
|
||||
ctx->ReturnInst();
|
||||
}
|
||||
else {
|
||||
// Set up basic blocks for goto targets
|
||||
ctx->InitializeLabelMap(code);
|
||||
// No check, just emit the code
|
||||
code->EmitCode(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->GetCurrentBasicBlock()) {
|
||||
@@ -314,7 +371,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
// issue a warning. Also need to warn if it's the entry block for
|
||||
// the function (in which case it will not have predeccesors but is
|
||||
// still reachable.)
|
||||
if (type->GetReturnType() != AtomicType::Void &&
|
||||
if (Type::Equal(type->GetReturnType(), AtomicType::Void) == false &&
|
||||
(pred_begin(ec.bblock) != pred_end(ec.bblock) || (ec.bblock == entryBBlock)))
|
||||
Warning(sym->pos, "Missing return statement in function returning \"%s\".",
|
||||
type->rType->GetString().c_str());
|
||||
@@ -337,7 +394,7 @@ Function::GenerateIR() {
|
||||
return;
|
||||
|
||||
llvm::Function *function = sym->function;
|
||||
assert(function != NULL);
|
||||
Assert(function != NULL);
|
||||
|
||||
// But if that function has a definition, we don't want to redefine it.
|
||||
if (function->empty() == false) {
|
||||
@@ -352,9 +409,8 @@ Function::GenerateIR() {
|
||||
SourcePos firstStmtPos = sym->pos;
|
||||
if (code) {
|
||||
StmtList *sl = dynamic_cast<StmtList *>(code);
|
||||
if (sl && sl->GetStatements().size() > 0 &&
|
||||
sl->GetStatements()[0] != NULL)
|
||||
firstStmtPos = sl->GetStatements()[0]->pos;
|
||||
if (sl && sl->stmts.size() > 0 && sl->stmts[0] != NULL)
|
||||
firstStmtPos = sl->stmts[0]->pos;
|
||||
else
|
||||
firstStmtPos = code->pos;
|
||||
}
|
||||
@@ -376,10 +432,10 @@ Function::GenerateIR() {
|
||||
// it without a mask parameter and without name mangling so that
|
||||
// the application can call it
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
if (type->isExported) {
|
||||
if (!type->isTask) {
|
||||
LLVM_TYPE_CONST llvm::FunctionType *ftype =
|
||||
llvm::FunctionType *ftype =
|
||||
type->LLVMFunctionType(g->ctx);
|
||||
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
|
||||
std::string functionName = sym->name;
|
||||
|
||||
4
func.h
4
func.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
Copyright (c) 2011-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,7 +43,7 @@
|
||||
|
||||
class Function {
|
||||
public:
|
||||
Function(Symbol *sym, const std::vector<Symbol *> &args, Stmt *code);
|
||||
Function(Symbol *sym, Stmt *code);
|
||||
|
||||
const Type *GetReturnType() const;
|
||||
const FunctionType *GetType() const;
|
||||
|
||||
360
ispc.cpp
360
ispc.cpp
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -50,6 +50,7 @@
|
||||
#include <llvm/Analysis/DIBuilder.h>
|
||||
#include <llvm/Analysis/DebugInfo.h>
|
||||
#include <llvm/Support/Dwarf.h>
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/Target/TargetMachine.h>
|
||||
#include <llvm/Target/TargetOptions.h>
|
||||
#include <llvm/Target/TargetData.h>
|
||||
@@ -69,9 +70,88 @@ Module *m;
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Target
|
||||
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
static void __cpuid(int info[4], int infoType) {
|
||||
__asm__ __volatile__ ("cpuid"
|
||||
: "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
|
||||
: "0" (infoType));
|
||||
}
|
||||
|
||||
/* Save %ebx in case it's the PIC register */
|
||||
static void __cpuidex(int info[4], int level, int count) {
|
||||
__asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchg{l}\t{%%}ebx, %1\n\t"
|
||||
: "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3])
|
||||
: "0" (level), "2" (count));
|
||||
}
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
|
||||
|
||||
static const char *
|
||||
lGetSystemISA() {
|
||||
int info[4];
|
||||
__cpuid(info, 1);
|
||||
|
||||
if ((info[2] & (1 << 28)) != 0) {
|
||||
// AVX1 for sure. Do we have AVX2?
|
||||
// Call cpuid with eax=7, ecx=0
|
||||
__cpuidex(info, 7, 0);
|
||||
if ((info[1] & (1 << 5)) != 0)
|
||||
return "avx2";
|
||||
else
|
||||
return "avx";
|
||||
}
|
||||
else if ((info[2] & (1 << 19)) != 0)
|
||||
return "sse4";
|
||||
else if ((info[3] & (1 << 26)) != 0)
|
||||
return "sse2";
|
||||
else {
|
||||
fprintf(stderr, "Unable to detect supported SSE/AVX ISA. Exiting.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const char *supportedCPUs[] = {
|
||||
"atom", "penryn", "core2", "corei7",
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
"corei7-avx"
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
bool
|
||||
Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
bool pic, Target *t) {
|
||||
if (isa == NULL) {
|
||||
if (cpu != NULL) {
|
||||
// If a CPU was specified explicitly, try to pick the best
|
||||
// possible ISA based on that.
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
if (!strcmp(cpu, "sandybridge") ||
|
||||
!strcmp(cpu, "corei7-avx"))
|
||||
isa = "avx";
|
||||
else
|
||||
#endif
|
||||
if (!strcmp(cpu, "corei7") ||
|
||||
!strcmp(cpu, "penryn"))
|
||||
isa = "sse4";
|
||||
else
|
||||
isa = "sse2";
|
||||
fprintf(stderr, "Notice: no --target specified on command-line. "
|
||||
"Using ISA \"%s\" based on specified CPU \"%s\".\n", isa,
|
||||
cpu);
|
||||
}
|
||||
else {
|
||||
// No CPU and no ISA, so use CPUID to figure out what this CPU
|
||||
// supports.
|
||||
isa = lGetSystemISA();
|
||||
fprintf(stderr, "Notice: no --target specified on command-line. "
|
||||
"Using system ISA \"%s\".\n", isa);
|
||||
}
|
||||
}
|
||||
|
||||
if (cpu == NULL) {
|
||||
std::string hostCPU = llvm::sys::getHostCPUName();
|
||||
if (hostCPU.size() > 0)
|
||||
@@ -81,19 +161,24 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
cpu = "generic";
|
||||
}
|
||||
}
|
||||
else {
|
||||
bool foundCPU = false;
|
||||
for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0]));
|
||||
++i) {
|
||||
if (!strcmp(cpu, supportedCPUs[i])) {
|
||||
foundCPU = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (foundCPU == false) {
|
||||
fprintf(stderr, "Error: CPU type \"%s\" unknown. Supported CPUs: "
|
||||
"%s.\n", cpu, SupportedTargetCPUs().c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
t->cpu = cpu;
|
||||
|
||||
if (isa == NULL) {
|
||||
if (!strcasecmp(cpu, "atom"))
|
||||
isa = "sse2";
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
else if (!strcasecmp(cpu, "sandybridge") ||
|
||||
!strcasecmp(cpu, "corei7-avx"))
|
||||
isa = "avx";
|
||||
#endif // LLVM_3_0
|
||||
else
|
||||
isa = "sse4";
|
||||
}
|
||||
if (arch == NULL)
|
||||
arch = "x86-64";
|
||||
|
||||
@@ -129,24 +214,68 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 4;
|
||||
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse2-x2")) {
|
||||
t->isa = Target::SSE2;
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4")) {
|
||||
t->isa = Target::SSE4;
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 4;
|
||||
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2")) {
|
||||
t->isa = Target::SSE4;
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-4")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 4;
|
||||
t->maskingIsFree = true;
|
||||
t->allOffMaskIsSafe = true;
|
||||
t->maskBitCount = 1;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-8")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->maskingIsFree = true;
|
||||
t->allOffMaskIsSafe = true;
|
||||
t->maskBitCount = 1;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-16")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 16;
|
||||
t->vectorWidth = 16;
|
||||
t->maskingIsFree = true;
|
||||
t->allOffMaskIsSafe = true;
|
||||
t->maskBitCount = 1;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-1")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 1;
|
||||
t->vectorWidth = 1;
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
else if (!strcasecmp(isa, "avx")) {
|
||||
@@ -154,14 +283,40 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+avx,+popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx-x2")) {
|
||||
t->isa = Target::AVX;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx,+popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
#endif // LLVM 3.0
|
||||
#endif // LLVM 3.0+
|
||||
#if defined(LLVM_3_1svn)
|
||||
else if (!strcasecmp(isa, "avx2")) {
|
||||
t->isa = Target::AVX2;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2-x2")) {
|
||||
t->isa = Target::AVX2;
|
||||
t->nativeVectorWidth = 16;
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
#endif // LLVM 3.1
|
||||
else {
|
||||
fprintf(stderr, "Target ISA \"%s\" is unknown. Choices are: %s\n",
|
||||
isa, SupportedTargetISAs());
|
||||
@@ -178,17 +333,16 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
}
|
||||
|
||||
|
||||
const char *
|
||||
std::string
|
||||
Target::SupportedTargetCPUs() {
|
||||
return "atom, barcelona, core2, corei7, "
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
"corei7-avx, "
|
||||
#endif
|
||||
"istanbul, nocona, penryn, "
|
||||
#ifdef LLVM_2_9
|
||||
"sandybridge, "
|
||||
#endif
|
||||
"westmere";
|
||||
std::string ret;
|
||||
int count = sizeof(supportedCPUs) / sizeof(supportedCPUs[0]);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
ret += supportedCPUs[i];
|
||||
if (i != count - 1)
|
||||
ret += ", ";
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -201,10 +355,13 @@ Target::SupportedTargetArchs() {
|
||||
const char *
|
||||
Target::SupportedTargetISAs() {
|
||||
return "sse2, sse2-x2, sse4, sse4-x2"
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#ifndef LLVM_2_9
|
||||
", avx, avx-x2"
|
||||
#endif
|
||||
;
|
||||
#endif // !LLVM_2_9
|
||||
#ifdef LLVM_3_1svn
|
||||
", avx2, avx2-x2"
|
||||
#endif // LLVM_3_1svn
|
||||
", generic-4, generic-8, generic-16, generic-1";
|
||||
}
|
||||
|
||||
|
||||
@@ -241,11 +398,26 @@ Target::GetTargetMachine() const {
|
||||
|
||||
llvm::Reloc::Model relocModel = generatePIC ? llvm::Reloc::PIC_ :
|
||||
llvm::Reloc::Default;
|
||||
#if defined(LLVM_3_0svn) || defined(LLVM_3_1svn) || defined(LLVM_3_0)
|
||||
#if defined(LLVM_3_1svn)
|
||||
std::string featuresString = attributes;
|
||||
llvm::TargetOptions options;
|
||||
#if 0
|
||||
// This was breaking e.g. round() on SSE2, where the code we want to
|
||||
// run wants to do:
|
||||
// x += 0x1.0p23f;
|
||||
// x -= 0x1.0p23f;
|
||||
// But then LLVM was optimizing this away...
|
||||
if (g->opt.fastMath == true)
|
||||
options.UnsafeFPMath = 1;
|
||||
#endif
|
||||
llvm::TargetMachine *targetMachine =
|
||||
target->createTargetMachine(triple, cpu, featuresString, options,
|
||||
relocModel);
|
||||
#elif defined(LLVM_3_0)
|
||||
std::string featuresString = attributes;
|
||||
llvm::TargetMachine *targetMachine =
|
||||
target->createTargetMachine(triple, cpu, featuresString, relocModel);
|
||||
#else
|
||||
#else // LLVM 2.9
|
||||
#ifdef ISPC_IS_APPLE
|
||||
relocModel = llvm::Reloc::PIC_;
|
||||
#endif // ISPC_IS_APPLE
|
||||
@@ -255,8 +427,9 @@ Target::GetTargetMachine() const {
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
targetMachine->setRelocationModel(relocModel);
|
||||
#endif // !ISPC_IS_WINDOWS
|
||||
#endif
|
||||
assert(targetMachine != NULL);
|
||||
#endif // LLVM_2_9
|
||||
|
||||
Assert(targetMachine != NULL);
|
||||
|
||||
targetMachine->setAsmVerbosityDefault(true);
|
||||
return targetMachine;
|
||||
@@ -272,7 +445,10 @@ Target::GetISAString() const {
|
||||
return "sse4";
|
||||
case Target::AVX:
|
||||
return "avx";
|
||||
break;
|
||||
case Target::AVX2:
|
||||
return "avx2";
|
||||
case Target::GENERIC:
|
||||
return "generic";
|
||||
default:
|
||||
FATAL("Unhandled target in GetISAString()");
|
||||
}
|
||||
@@ -280,31 +456,115 @@ Target::GetISAString() const {
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
lGenericTypeLayoutIndeterminate(llvm::Type *type) {
|
||||
if (type->isPrimitiveType() || type->isIntegerTy())
|
||||
return false;
|
||||
|
||||
if (type == LLVMTypes::BoolVectorType ||
|
||||
type == LLVMTypes::MaskType ||
|
||||
type == LLVMTypes::Int1VectorType)
|
||||
return true;
|
||||
|
||||
llvm::ArrayType *at =
|
||||
llvm::dyn_cast<llvm::ArrayType>(type);
|
||||
if (at != NULL)
|
||||
return lGenericTypeLayoutIndeterminate(at->getElementType());
|
||||
|
||||
llvm::PointerType *pt =
|
||||
llvm::dyn_cast<llvm::PointerType>(type);
|
||||
if (pt != NULL)
|
||||
return false;
|
||||
|
||||
llvm::StructType *st =
|
||||
llvm::dyn_cast<llvm::StructType>(type);
|
||||
if (st != NULL) {
|
||||
for (int i = 0; i < (int)st->getNumElements(); ++i)
|
||||
if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
Assert(llvm::isa<llvm::VectorType>(type));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
llvm::Value *
|
||||
Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type) {
|
||||
Target::SizeOf(llvm::Type *type,
|
||||
llvm::BasicBlock *insertAtEnd) {
|
||||
if (isa == Target::GENERIC &&
|
||||
lGenericTypeLayoutIndeterminate(type)) {
|
||||
llvm::Value *index[1] = { LLVMInt32(1) };
|
||||
llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
|
||||
llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep",
|
||||
insertAtEnd);
|
||||
#else
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, &index[0], &index[1],
|
||||
"sizeof_gep", insertAtEnd);
|
||||
#endif
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
|
||||
"sizeof_int", insertAtEnd);
|
||||
else
|
||||
return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
|
||||
"sizeof_int", insertAtEnd);
|
||||
}
|
||||
|
||||
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
|
||||
assert(td != NULL);
|
||||
uint64_t byteSize = td->getTypeSizeInBits(type) / 8;
|
||||
Assert(td != NULL);
|
||||
uint64_t bitSize = td->getTypeSizeInBits(type);
|
||||
Assert((bitSize % 8) == 0);
|
||||
uint64_t byteSize = bitSize / 8;
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return LLVMInt32(byteSize);
|
||||
return LLVMInt32((int32_t)byteSize);
|
||||
else
|
||||
return LLVMInt64(byteSize);
|
||||
}
|
||||
|
||||
|
||||
llvm::Value *
|
||||
Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element) {
|
||||
Target::StructOffset(llvm::Type *type, int element,
|
||||
llvm::BasicBlock *insertAtEnd) {
|
||||
if (isa == Target::GENERIC &&
|
||||
lGenericTypeLayoutIndeterminate(type) == true) {
|
||||
llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
|
||||
llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
|
||||
llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep",
|
||||
insertAtEnd);
|
||||
#else
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, &indices[0], &indices[2],
|
||||
"offset_gep", insertAtEnd);
|
||||
#endif
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
|
||||
"offset_int", insertAtEnd);
|
||||
else
|
||||
return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
|
||||
"offset_int", insertAtEnd);
|
||||
}
|
||||
|
||||
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
|
||||
assert(td != NULL);
|
||||
LLVM_TYPE_CONST llvm::StructType *structType =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(type);
|
||||
assert(structType != NULL);
|
||||
Assert(td != NULL);
|
||||
llvm::StructType *structType =
|
||||
llvm::dyn_cast<llvm::StructType>(type);
|
||||
Assert(structType != NULL);
|
||||
const llvm::StructLayout *sl = td->getStructLayout(structType);
|
||||
assert(sl != NULL);
|
||||
Assert(sl != NULL);
|
||||
|
||||
uint64_t offset = sl->getElementOffset(element);
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return LLVMInt32(offset);
|
||||
return LLVMInt32((int32_t)offset);
|
||||
else
|
||||
return LLVMInt64(offset);
|
||||
}
|
||||
@@ -320,6 +580,7 @@ Opt::Opt() {
|
||||
force32BitAddressing = true;
|
||||
unrollLoops = true;
|
||||
disableAsserts = false;
|
||||
disableMaskAllOnOptimizations = false;
|
||||
disableHandlePseudoMemoryOps = false;
|
||||
disableBlendedMaskedStores = false;
|
||||
disableCoherentControlFlow = false;
|
||||
@@ -328,7 +589,7 @@ Opt::Opt() {
|
||||
disableMaskedStoreToStore = false;
|
||||
disableGatherScatterFlattening = false;
|
||||
disableUniformMemoryOptimizations = false;
|
||||
disableMaskedStoreOptimizations = false;
|
||||
disableCoalescing = false;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -342,12 +603,15 @@ Globals::Globals() {
|
||||
debugPrint = false;
|
||||
disableWarnings = false;
|
||||
warningsAsErrors = false;
|
||||
quiet = false;
|
||||
disableLineWrap = false;
|
||||
emitPerfWarnings = true;
|
||||
emitInstrumentation = false;
|
||||
generateDebuggingSymbols = false;
|
||||
enableFuzzTest = false;
|
||||
fuzzTestSeed = -1;
|
||||
mangleFunctionsWithTarget = false;
|
||||
|
||||
|
||||
ctx = new llvm::LLVMContext;
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
@@ -362,7 +626,13 @@ Globals::Globals() {
|
||||
// SourcePos
|
||||
|
||||
SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
|
||||
name = n ? n : m->module->getModuleIdentifier().c_str();
|
||||
name = n;
|
||||
if (name == NULL) {
|
||||
if (m != NULL)
|
||||
name = m->module->getModuleIdentifier().c_str();
|
||||
else
|
||||
name = "(unknown)";
|
||||
}
|
||||
first_line = fl;
|
||||
first_column = fc;
|
||||
last_line = ll != 0 ? ll : fl;
|
||||
|
||||
107
ispc.h
107
ispc.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,8 +38,10 @@
|
||||
#ifndef ISPC_H
|
||||
#define ISPC_H
|
||||
|
||||
#if !defined(LLVM_2_9) && !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn)
|
||||
#error "Only LLVM 2.9, 3.0, and the 3.1 development branch are supported"
|
||||
#define ISPC_VERSION "1.2.2"
|
||||
|
||||
#if !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn)
|
||||
#error "Only LLVM 3.0, and the 3.1 development branch are supported"
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@@ -50,11 +52,22 @@
|
||||
#define ISPC_IS_APPLE
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#define Assert(expr) \
|
||||
((void)((expr) ? 0 : __Assert (#expr, __FILE__, __LINE__)))
|
||||
#define __Assert(expr, file, line) \
|
||||
((void)fprintf(stderr, "%s:%u: Assertion failed: \"%s\"\n" \
|
||||
"***\n*** Please file a bug report at " \
|
||||
"https://github.com/ispc/ispc/issues\n*** (Including as much " \
|
||||
"information as you can about how to reproduce this error).\n" \
|
||||
"*** You have apparently encountered a bug in the compiler that " \
|
||||
"we'd like to fix!\n***\n", file, line, expr), abort(), 0)
|
||||
|
||||
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
|
||||
targets.
|
||||
*/
|
||||
@@ -79,14 +92,10 @@ namespace llvm {
|
||||
class Value;
|
||||
}
|
||||
|
||||
// llvm::Type *s are no longer const in llvm 3.0
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#define LLVM_TYPE_CONST
|
||||
#else
|
||||
#define LLVM_TYPE_CONST const
|
||||
#endif
|
||||
|
||||
class ArrayType;
|
||||
class AST;
|
||||
class ASTNode;
|
||||
class AtomicType;
|
||||
class FunctionEmitContext;
|
||||
class Expr;
|
||||
@@ -94,12 +103,22 @@ class ExprList;
|
||||
class Function;
|
||||
class FunctionType;
|
||||
class Module;
|
||||
class PointerType;
|
||||
class Stmt;
|
||||
class Symbol;
|
||||
class SymbolTable;
|
||||
class Type;
|
||||
struct VariableDeclaration;
|
||||
|
||||
enum StorageClass {
|
||||
SC_NONE,
|
||||
SC_EXTERN,
|
||||
SC_STATIC,
|
||||
SC_TYPEDEF,
|
||||
SC_EXTERN_C
|
||||
};
|
||||
|
||||
|
||||
/** @brief Representation of a range of positions in a source file.
|
||||
|
||||
This class represents a range of characters in a source file
|
||||
@@ -148,7 +167,7 @@ struct Target {
|
||||
|
||||
/** Returns a comma-delimited string giving the names of the currently
|
||||
supported target CPUs. */
|
||||
static const char *SupportedTargetCPUs();
|
||||
static std::string SupportedTargetCPUs();
|
||||
|
||||
/** Returns a comma-delimited string giving the names of the currently
|
||||
supported target architectures. */
|
||||
@@ -166,12 +185,14 @@ struct Target {
|
||||
const char *GetISAString() const;
|
||||
|
||||
/** Returns the size of the given type */
|
||||
llvm::Value *SizeOf(LLVM_TYPE_CONST llvm::Type *type);
|
||||
llvm::Value *SizeOf(llvm::Type *type,
|
||||
llvm::BasicBlock *insertAtEnd);
|
||||
|
||||
/** Given a structure type and an element number in the structure,
|
||||
returns a value corresponding to the number of bytes from the start
|
||||
of the structure where the element is located. */
|
||||
llvm::Value *StructOffset(LLVM_TYPE_CONST llvm::Type *type,
|
||||
int element);
|
||||
llvm::Value *StructOffset(llvm::Type *type,
|
||||
int element, llvm::BasicBlock *insertAtEnd);
|
||||
|
||||
/** llvm Target object representing this target. */
|
||||
const llvm::Target *target;
|
||||
@@ -182,7 +203,7 @@ struct Target {
|
||||
flexible/performant of them will apear last in the enumerant. Note
|
||||
also that __best_available_isa() needs to be updated if ISAs are
|
||||
added or the enumerant values are reordered. */
|
||||
enum ISA { SSE2, SSE4, AVX, NUM_ISAS };
|
||||
enum ISA { SSE2, SSE4, AVX, AVX2, GENERIC, NUM_ISAS };
|
||||
|
||||
/** Instruction set being compiled to. */
|
||||
ISA isa;
|
||||
@@ -211,6 +232,23 @@ struct Target {
|
||||
|
||||
/** Indicates whether position independent code should be generated. */
|
||||
bool generatePIC;
|
||||
|
||||
/** Is there overhead associated with masking on the target
|
||||
architecture; e.g. there is on SSE, due to extra blends and the
|
||||
like, but there isn't with an ISA that supports masking
|
||||
natively. */
|
||||
bool maskingIsFree;
|
||||
|
||||
/** Is it safe to run code with the mask all if: e.g. on SSE, the fast
|
||||
gather trick assumes that at least one program instance is running
|
||||
(so that it can safely assume that the array base pointer is
|
||||
valid). */
|
||||
bool allOffMaskIsSafe;
|
||||
|
||||
/** How many bits are used to store each element of the mask: e.g. this
|
||||
is 32 on SSE/AVX, since that matches the HW better, but it's 1 for
|
||||
the generic target. */
|
||||
int maskBitCount;
|
||||
};
|
||||
|
||||
|
||||
@@ -247,10 +285,15 @@ struct Opt {
|
||||
*/
|
||||
bool force32BitAddressing;
|
||||
|
||||
/** Indicates whether assert() statements should be ignored (for
|
||||
/** Indicates whether Assert() statements should be ignored (for
|
||||
performance in the generated code). */
|
||||
bool disableAsserts;
|
||||
|
||||
|
||||
/** If enabled, disables the various optimizations that kick in when
|
||||
the execution mask can be determined to be "all on" at compile
|
||||
time. */
|
||||
bool disableMaskAllOnOptimizations;
|
||||
|
||||
/** If enabled, the various __pseudo* memory ops (gather/scatter,
|
||||
masked load/store) are left in their __pseudo* form, for better
|
||||
understanding of the structure of generated code when reading
|
||||
@@ -303,13 +346,9 @@ struct Opt {
|
||||
the impact of this optimization. */
|
||||
bool disableUniformMemoryOptimizations;
|
||||
|
||||
/** Disables optimizations for masked stores: masked stores with the
|
||||
mask all on are transformed to regular stores, and masked stores
|
||||
with the mask are all off are removed (which in turn can allow
|
||||
eliminating additional dead code related to computing the value
|
||||
stored). This is likely only useful for measuring the impact of
|
||||
this optimization. */
|
||||
bool disableMaskedStoreOptimizations;
|
||||
/** Disables optimizations that coalesce incoherent scalar memory
|
||||
access from gathers into wider vector operations, when possible. */
|
||||
bool disableCoalescing;
|
||||
};
|
||||
|
||||
/** @brief This structure collects together a number of global variables.
|
||||
@@ -359,6 +398,9 @@ struct Globals {
|
||||
possible performance pitfalls. */
|
||||
bool emitPerfWarnings;
|
||||
|
||||
/** Indicates whether all printed output should be surpressed. */
|
||||
bool quiet;
|
||||
|
||||
/** Indicates whether calls should be emitted in the program to an
|
||||
externally-defined program instrumentation function. (See the
|
||||
"Instrumenting your ispc programs" section in the user's
|
||||
@@ -373,6 +415,14 @@ struct Globals {
|
||||
vector width to them. */
|
||||
bool mangleFunctionsWithTarget;
|
||||
|
||||
/** If enabled, the lexer will randomly replace some tokens returned
|
||||
with other tokens, in order to test error condition handling in the
|
||||
compiler. */
|
||||
bool enableFuzzTest;
|
||||
|
||||
/** Seed for random number generator used for fuzz testing. */
|
||||
int fuzzTestSeed;
|
||||
|
||||
/** Global LLVMContext object */
|
||||
llvm::LLVMContext *ctx;
|
||||
|
||||
@@ -383,18 +433,25 @@ struct Globals {
|
||||
/** Arguments to pass along to the C pre-processor, if it is run on the
|
||||
program before compilation. */
|
||||
std::vector<std::string> cppArgs;
|
||||
|
||||
/** Additional user-provided directories to search when processing
|
||||
#include directives in the preprocessor. */
|
||||
std::vector<std::string> includePath;
|
||||
};
|
||||
|
||||
enum {
|
||||
COST_ASSIGN = 1,
|
||||
COST_COHERENT_BREAK_CONTINE = 4,
|
||||
COST_COMPLEX_ARITH_OP = 4,
|
||||
COST_DELETE = 32,
|
||||
COST_DEREF = 4,
|
||||
COST_FUNCALL = 4,
|
||||
COST_FUNPTR_UNIFORM = 12,
|
||||
COST_FUNPTR_VARYING = 24,
|
||||
COST_GATHER = 8,
|
||||
COST_GOTO = 4,
|
||||
COST_LOAD = 2,
|
||||
COST_NEW = 32,
|
||||
COST_REGULAR_BREAK_CONTINUE = 2,
|
||||
COST_RETURN = 4,
|
||||
COST_SELECT = 4,
|
||||
@@ -407,6 +464,8 @@ enum {
|
||||
COST_VARYING_IF = 3,
|
||||
COST_UNIFORM_LOOP = 4,
|
||||
COST_VARYING_LOOP = 6,
|
||||
COST_UNIFORM_SWITCH = 4,
|
||||
COST_VARYING_SWITCH = 12,
|
||||
COST_ASSERT = 8,
|
||||
|
||||
CHECK_MASK_AT_FUNCTION_START_COST = 16,
|
||||
|
||||
5
ispc.sln
5
ispc.sln
@@ -3,8 +3,6 @@ Microsoft Visual Studio Solution File, Format Version 11.00
|
||||
# Visual Studio 2010
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ispc", "ispc.vcxproj", "{9861F490-F516-480C-B63C-D62A77AFA9D5}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ispc_test", "ispc_test.vcxproj", "{92547BA8-BE86-4E78-8799-1D72A70E5831}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
@@ -15,9 +13,6 @@ Global
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Release|Win32.Build.0 = Release|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Release|Win32.ActiveCfg = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
228
ispc.vcxproj
228
ispc.vcxproj
@@ -13,20 +13,28 @@
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ast.cpp" />
|
||||
<ClCompile Include="builtins.cpp" />
|
||||
<ClCompile Include="cbackend.cpp" />
|
||||
<ClCompile Include="ctx.cpp" />
|
||||
<ClCompile Include="decl.cpp" />
|
||||
<ClCompile Include="expr.cpp" />
|
||||
<ClCompile Include="func.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx1.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx1-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx2-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-c-32.cpp" />
|
||||
<ClCompile Include="gen-bitcode-c-64.cpp" />
|
||||
<ClCompile Include="gen-bitcode-dispatch.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-1.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-4.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-8.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-16.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse2-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse4.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse4-x2.cpp" />
|
||||
<ClCompile Include="gen-stdlib.cpp" />
|
||||
<ClCompile Include="gen-stdlib-generic.cpp" />
|
||||
<ClCompile Include="gen-stdlib-x86.cpp" />
|
||||
<ClCompile Include="ispc.cpp" />
|
||||
<ClCompile Include="lex.cc">
|
||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4146;4800;4996;4355;4624;4005;4003;4018</DisableSpecificWarnings>
|
||||
@@ -40,15 +48,15 @@
|
||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4146;4800;4996;4355;4624;4005;4065</DisableSpecificWarnings>
|
||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">4146;4800;4996;4355;4624;4005;4065</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<CustomBuild Include="builtins-c.c">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c > gen-bitcode-c-32.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-64.c > gen-bitcode-c-64.cpp</Command>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">clang builtins-c.c</Message>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c > gen-bitcode-c-32.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-64.c > gen-bitcode-c-64.cpp</Command>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">clang builtins-c.c</Message>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-c-32.cpp;gen-bitcore-c-64.cpp</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-c-32.cpp;gen-bitcore-c-64.cpp</Outputs>
|
||||
<CustomBuild Include="builtins\builtins.c">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-32 > gen-bitcode-c-32.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-64 > gen-bitcode-c-64.cpp</Command>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building builtins.c</Message>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-32 > gen-bitcode-c-32.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-64 > gen-bitcode-c-64.cpp</Command>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building builtins.c</Message>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-c-32.cpp;gen-bitcode-c-64.cpp</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-c-32.cpp;gen-bitcode-c-64.cpp</Outputs>
|
||||
</CustomBuild>
|
||||
<ClCompile Include="stmt.cpp" />
|
||||
<ClCompile Include="sym.cpp" />
|
||||
@@ -75,103 +83,185 @@
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="stdlib.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib.cpp</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib.cpp</Outputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib.cpp</Message>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py x86 > gen-stdlib-x86.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DISPC_TARGET_GENERIC=1 -DPI=3.1415926535 | python stdlib2cpp.py generic > gen-stdlib-generic.cpp;
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib-generic.cpp;gen-stdlib-x86.cpp</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py x86 > gen-stdlib-x86.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DISPC_TARGET_GENERIC=1 -DPI=3.1415926535 | python stdlib2cpp.py generic > gen-stdlib-generic.cpp;
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib-generic.cpp;gen-stdlib-x86.cpp</Outputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib-{generic,x86}.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib-{generic,x86}.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-sse4.ll">
|
||||
<CustomBuild Include="builtins\dispatch.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse4.ll | python bitcode2cpp.py builtins-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse4.ll | python bitcode2cpp.py builtins-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-dispatch.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-dispatch.ll | python bitcode2cpp.py builtins-dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\dispatch.ll | python bitcode2cpp.py dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-dispatch.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-dispatch.ll | python bitcode2cpp.py builtins-dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\dispatch.ll | python bitcode2cpp.py dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-dispatch.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4</AdditionalInputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-dispatch.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-dispatch.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-sse4-x2.ll">
|
||||
<CustomBuild Include="builtins\target-sse4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse4-x2.ll | python bitcode2cpp.py builtins-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-sse4-x2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse4-x2.ll | python bitcode2cpp.py builtins-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4-x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4-x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-sse2.ll">
|
||||
<CustomBuild Include="builtins\target-sse2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse2.ll | python bitcode2cpp.py builtins-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse2.ll | python bitcode2cpp.py builtins-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-sse2-x2.ll">
|
||||
<CustomBuild Include="builtins\target-sse2-x2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse2-x2.ll | python bitcode2cpp.py builtins-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse2-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse2-x2.ll | python bitcode2cpp.py builtins-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse2-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse2-x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse2-x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-avx.ll">
|
||||
<CustomBuild Include="builtins\target-avx1.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-avx.ll | python bitcode2cpp.py builtins-avx.ll > gen-bitcode-avx.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-avx-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-avx.ll | python bitcode2cpp.py builtins-avx.ll > gen-bitcode-avx.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-avx-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx.cpp</Message>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx1.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx1.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx1.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx1.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-avx-x2.ll">
|
||||
<CustomBuild Include="builtins\target-avx1-x2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-avx-x2.ll | python bitcode2cpp.py builtins-avx-x2.ll > gen-bitcode-avx-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-avx-x2.ll | python bitcode2cpp.py builtins-avx-x2.ll > gen-bitcode-avx-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx-x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx-x2.cpp</Message>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx1-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx1-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx1-x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx1-x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx2-x2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx2-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx2-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx2-x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx2-x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-1.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll > gen-bitcode-generic-1.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-1.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll > gen-bitcode-generic-1.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-1.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-1.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-1.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-4.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-4.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-8.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-8.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-8.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-8.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-8.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-16.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-16.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-16.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-16.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-16.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
||||
379
ispc_test.cpp
379
ispc_test.cpp
@@ -1,379 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define ISPC_IS_WINDOWS
|
||||
#elif defined(__linux__)
|
||||
#define ISPC_IS_LINUX
|
||||
#elif defined(__APPLE__)
|
||||
#define ISPC_IS_APPLE
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <memory.h>
|
||||
#ifdef ISPC_IS_LINUX
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_HAVE_SVML
|
||||
#include <xmmintrin.h>
|
||||
extern "C" {
|
||||
extern __m128 __svml_sinf4(__m128);
|
||||
extern __m128 __svml_cosf4(__m128);
|
||||
extern __m128 __svml_sincosf4(__m128 *,__m128);
|
||||
extern __m128 __svml_tanf4(__m128);
|
||||
extern __m128 __svml_atanf4(__m128);
|
||||
extern __m128 __svml_atan2f4(__m128, __m128);
|
||||
extern __m128 __svml_expf4(__m128);
|
||||
extern __m128 __svml_logf4(__m128);
|
||||
extern __m128 __svml_powf4(__m128, __m128);
|
||||
}
|
||||
#endif
|
||||
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Module.h>
|
||||
#include <llvm/Type.h>
|
||||
#include <llvm/DerivedTypes.h>
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/ExecutionEngine/ExecutionEngine.h>
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#else
|
||||
#include <llvm/Target/TargetRegistry.h>
|
||||
#include <llvm/Target/TargetSelect.h>
|
||||
#endif
|
||||
#include <llvm/ExecutionEngine/JIT.h>
|
||||
#include <llvm/Target/TargetOptions.h>
|
||||
#include <llvm/Target/TargetData.h>
|
||||
#include <llvm/Transforms/Scalar.h>
|
||||
#include <llvm/Transforms/IPO.h>
|
||||
#include <llvm/PassManager.h>
|
||||
#include <llvm/Support/CFG.h>
|
||||
#include <llvm/Analysis/Verifier.h>
|
||||
#include <llvm/Assembly/PrintModulePass.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <llvm/Bitcode/ReaderWriter.h>
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
#include <llvm/Support/system_error.h>
|
||||
|
||||
bool shouldFail = false;
|
||||
|
||||
extern "C" {
|
||||
void ISPCLaunch(void **, void *, void *, int32_t);
|
||||
void ISPCSync(void *);
|
||||
void *ISPCAlloc(void **, int64_t size, int32_t alignment);
|
||||
}
|
||||
|
||||
void ISPCLaunch(void **handle, void *func, void *data, int32_t count) {
|
||||
*handle = (void *)0xdeadbeef;
|
||||
typedef void (*TaskFuncType)(void *, int, int, int, int);
|
||||
TaskFuncType tft = (TaskFuncType)(func);
|
||||
for (int i = 0; i < count; ++i)
|
||||
tft(data, 0, 1, i, count);
|
||||
}
|
||||
|
||||
|
||||
void ISPCSync(void *) {
|
||||
}
|
||||
|
||||
|
||||
void *ISPCAlloc(void **handle, int64_t size, int32_t alignment) {
|
||||
*handle = (void *)0xdeadbeef;
|
||||
// leak time!
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
return _aligned_malloc((size_t)size, alignment);
|
||||
#endif
|
||||
#ifdef ISPC_IS_LINUX
|
||||
return memalign(alignment, size);
|
||||
#endif
|
||||
#ifdef ISPC_IS_APPLE
|
||||
void *mem = malloc(size + (alignment-1) + sizeof(void*));
|
||||
char *amem = ((char*)mem) + sizeof(void*);
|
||||
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
|
||||
(alignment - 1)));
|
||||
((void**)amem)[-1] = mem;
|
||||
return amem;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void usage(int ret) {
|
||||
fprintf(stderr, "usage: ispc_test\n");
|
||||
fprintf(stderr, "\t[-h/--help]\tprint help\n");
|
||||
fprintf(stderr, "\t[-f]\t\tindicates that test is expected to fail\n");
|
||||
fprintf(stderr, "\t<files>\n");
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
static void svml_missing() {
|
||||
fprintf(stderr, "Program called unavailable SVML function!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// On Windows, sin() is an overloaded function, so we need an unambiguous
|
||||
// function we can take the address of when wiring up the external references
|
||||
// below.
|
||||
|
||||
double Sin(double x) { return sin(x); }
|
||||
double Cos(double x) { return cos(x); }
|
||||
double Tan(double x) { return tan(x); }
|
||||
double Atan(double x) { return atan(x); }
|
||||
double Atan2(double y, double x) { return atan2(y, x); }
|
||||
double Pow(double a, double b) { return pow(a, b); }
|
||||
double Exp(double x) { return exp(x); }
|
||||
double Log(double x) { return log(x); }
|
||||
|
||||
static bool lRunTest(const char *fn) {
|
||||
llvm::LLVMContext *ctx = new llvm::LLVMContext;
|
||||
|
||||
llvm::OwningPtr<llvm::MemoryBuffer> buf;
|
||||
llvm::error_code err = llvm::MemoryBuffer::getFileOrSTDIN(fn, buf);
|
||||
if (err) {
|
||||
fprintf(stderr, "Unable to open file \"%s\": %s\n", fn, err.message().c_str());
|
||||
delete ctx;
|
||||
return false;
|
||||
}
|
||||
std::string bcErr;
|
||||
llvm::Module *module = llvm::ParseBitcodeFile(buf.get(), *ctx, &bcErr);
|
||||
|
||||
if (!module) {
|
||||
fprintf(stderr, "Bitcode reader failed for \"%s\": %s\n", fn, bcErr.c_str());
|
||||
delete ctx;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string eeError;
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::EngineBuilder engineBuilder(module);
|
||||
engineBuilder.setErrorStr(&eeError);
|
||||
engineBuilder.setEngineKind(llvm::EngineKind::JIT);
|
||||
#if 0
|
||||
std::vector<std::string> attributes;
|
||||
if (target != NULL && !strcmp(target, "avx"))
|
||||
attributes.push_back("+avx");
|
||||
engineBuilder.setMAttrs(attributes);
|
||||
engineBuilder.setUseMCJIT(true);
|
||||
#endif
|
||||
llvm::ExecutionEngine *ee = engineBuilder.create();
|
||||
#else
|
||||
llvm::ExecutionEngine *ee = llvm::ExecutionEngine::createJIT(module, &eeError);
|
||||
#endif
|
||||
if (!ee) {
|
||||
fprintf(stderr, "Unable to create ExecutionEngine: %s\n", eeError.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
llvm::Function *func;
|
||||
#define DO_FUNC(FUNC ,FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)FUNC)
|
||||
DO_FUNC(ISPCLaunch, "ISPCLaunch");
|
||||
DO_FUNC(ISPCSync, "ISPCSync");
|
||||
DO_FUNC(ISPCAlloc, "ISPCAlloc");
|
||||
DO_FUNC(putchar, "putchar");
|
||||
DO_FUNC(printf, "printf");
|
||||
DO_FUNC(fflush, "fflush");
|
||||
DO_FUNC(sinf, "sinf");
|
||||
DO_FUNC(cosf, "cosf");
|
||||
DO_FUNC(tanf, "tanf");
|
||||
DO_FUNC(atanf, "atanf");
|
||||
DO_FUNC(atan2f, "atan2f");
|
||||
DO_FUNC(powf, "powf");
|
||||
DO_FUNC(expf, "expf");
|
||||
DO_FUNC(logf, "logf");
|
||||
DO_FUNC(Sin, "sin");
|
||||
DO_FUNC(Cos, "cos");
|
||||
DO_FUNC(Tan, "tan");
|
||||
DO_FUNC(Atan, "atan");
|
||||
DO_FUNC(Atan2, "atan2");
|
||||
DO_FUNC(Pow, "pow");
|
||||
DO_FUNC(Exp, "exp");
|
||||
DO_FUNC(Log, "log");
|
||||
DO_FUNC(memset, "memset");
|
||||
#ifdef ISPC_IS_APPLE
|
||||
DO_FUNC(memset_pattern4, "memset_pattern4");
|
||||
DO_FUNC(memset_pattern8, "memset_pattern8");
|
||||
DO_FUNC(memset_pattern16, "memset_pattern16");
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_HAVE_SVML
|
||||
#define DO_SVML(FUNC ,FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)FUNC)
|
||||
#else
|
||||
#define DO_SVML(FUNC, FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)svml_missing)
|
||||
#endif
|
||||
|
||||
DO_SVML(__svml_sinf4, "__svml_sinf4");
|
||||
DO_SVML(__svml_cosf4, "__svml_cosf4");
|
||||
DO_SVML(__svml_sincosf4, "__svml_sincosf4");
|
||||
DO_SVML(__svml_tanf4, "__svml_tanf4");
|
||||
DO_SVML(__svml_atanf4, "__svml_atanf4");
|
||||
DO_SVML(__svml_atan2f4, "__svml_atan2f4");
|
||||
DO_SVML(__svml_expf4, "__svml_expf4");
|
||||
DO_SVML(__svml_logf4, "__svml_logf4");
|
||||
DO_SVML(__svml_powf4, "__svml_powf4");
|
||||
|
||||
// figure out the vector width in the compiled code
|
||||
func = module->getFunction("width");
|
||||
if (!func) {
|
||||
fprintf(stderr, "No width() function found!\n");
|
||||
return false;
|
||||
}
|
||||
int width;
|
||||
{
|
||||
typedef int (*PFN)();
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
width = pfn();
|
||||
assert(width == 4 || width == 8 || width == 12 || width == 16);
|
||||
}
|
||||
|
||||
// find the value that returns the desired result
|
||||
func = module->getFunction("result");
|
||||
bool foundResult = (func != NULL);
|
||||
float result[16];
|
||||
for (int i = 0; i < 16; ++i)
|
||||
result[i] = 0;
|
||||
if (foundResult) {
|
||||
typedef void (*PFN)(float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(result);
|
||||
}
|
||||
else
|
||||
fprintf(stderr, "Warning: no result() function found.\n");
|
||||
|
||||
// try to find a function to run
|
||||
float returned[16];
|
||||
for (int i = 0; i < 16; ++i)
|
||||
returned[i] = 0;
|
||||
float vfloat[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
|
||||
double vdouble[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
|
||||
int vint[16] = { 2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32 };
|
||||
int vint2[16] = { 5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
|
||||
|
||||
if ((func = module->getFunction("f_v")) != NULL) {
|
||||
typedef void (*PFN)(float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned);
|
||||
}
|
||||
else if ((func = module->getFunction("f_f")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
llvm::verifyFunction(*func);
|
||||
pfn(returned, vfloat);
|
||||
}
|
||||
else if ((func = module->getFunction("f_fu")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *, float fu);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
llvm::verifyFunction(*func);
|
||||
pfn(returned, vfloat, 5.);
|
||||
}
|
||||
else if ((func = module->getFunction("f_fi")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *, int *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vfloat, vint);
|
||||
}
|
||||
else if ((func = module->getFunction("f_du")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, double);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, 5.);
|
||||
}
|
||||
else if ((func = module->getFunction("f_duf")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, float);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, 5.f);
|
||||
}
|
||||
else if ((func = module->getFunction("f_di")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, int *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, vint2);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Unable to find runnable function in file \"%s\"\n", fn);
|
||||
return false;
|
||||
}
|
||||
|
||||
// see if we got the right result
|
||||
bool resultsMatch = true;
|
||||
if (foundResult) {
|
||||
for (int i = 0; i < width; ++i)
|
||||
if (returned[i] != result[i]) {
|
||||
resultsMatch = false;
|
||||
fprintf(stderr, "Test \"%s\" RETURNED %d: %g / %a EXPECTED %g / %a\n",
|
||||
fn, i, returned[i], returned[i], result[i], result[i]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < width; ++i)
|
||||
fprintf(stderr, "Test \"%s\" returned %d: %g / %a\n",
|
||||
fn, i, returned[i], returned[i]);
|
||||
}
|
||||
if (foundResult && shouldFail && resultsMatch)
|
||||
fprintf(stderr, "Test %s unexpectedly passed\n", fn);
|
||||
|
||||
delete ee;
|
||||
delete ctx;
|
||||
|
||||
return foundResult && resultsMatch;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
llvm::InitializeNativeTarget();
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
LLVMLinkInJIT();
|
||||
#endif
|
||||
|
||||
const char *filename = NULL;
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
|
||||
usage(0);
|
||||
if (!strcmp(argv[i], "-f"))
|
||||
shouldFail = true;
|
||||
else
|
||||
filename = argv[i];
|
||||
}
|
||||
|
||||
return (lRunTest(filename) == true) ? 0 : 1;
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ispc_test.cpp" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{92547BA8-BE86-4E78-8799-1D72A70E5831}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>ispc_test</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>LLVM_3_0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4146;4355;4800</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Desc.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>LLVM_3_0;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4146;4355;4800</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Desc.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
651
lex.ll
651
lex.ll
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,7 +42,8 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static uint64_t lParseBinary(const char *ptr, SourcePos pos);
|
||||
static uint64_t lParseBinary(const char *ptr, SourcePos pos, char **endPtr);
|
||||
static int lParseInteger(bool dotdotdot);
|
||||
static void lCComment(SourcePos *);
|
||||
static void lCppComment(SourcePos *);
|
||||
static void lHandleCppHash(SourcePos *);
|
||||
@@ -50,24 +51,280 @@ static void lStringConst(YYSTYPE *, SourcePos *);
|
||||
static double lParseHexFloat(const char *ptr);
|
||||
|
||||
#define YY_USER_ACTION \
|
||||
yylloc->first_line = yylloc->last_line; \
|
||||
yylloc->first_column = yylloc->last_column; \
|
||||
yylloc->last_column += yyleng;
|
||||
yylloc.first_line = yylloc.last_line; \
|
||||
yylloc.first_column = yylloc.last_column; \
|
||||
yylloc.last_column += yyleng;
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
inline int isatty(int) { return 0; }
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
|
||||
static int allTokens[] = {
|
||||
TOKEN_ASSERT, TOKEN_BOOL, TOKEN_BREAK, TOKEN_CASE, TOKEN_CBREAK,
|
||||
TOKEN_CCONTINUE, TOKEN_CDO, TOKEN_CFOR, TOKEN_CIF, TOKEN_CWHILE,
|
||||
TOKEN_CONST, TOKEN_CONTINUE, TOKEN_CRETURN, TOKEN_DEFAULT, TOKEN_DO,
|
||||
TOKEN_DELETE, TOKEN_DOUBLE, TOKEN_ELSE, TOKEN_ENUM,
|
||||
TOKEN_EXPORT, TOKEN_EXTERN, TOKEN_FALSE, TOKEN_FLOAT, TOKEN_FOR,
|
||||
TOKEN_FOREACH, TOKEN_FOREACH_TILED, TOKEN_GOTO, TOKEN_IF, TOKEN_INLINE,
|
||||
TOKEN_INT, TOKEN_INT8, TOKEN_INT16, TOKEN_INT, TOKEN_INT64, TOKEN_LAUNCH,
|
||||
TOKEN_NEW, TOKEN_NULL, TOKEN_PRINT, TOKEN_RETURN, TOKEN_SOA, TOKEN_SIGNED,
|
||||
TOKEN_SIZEOF, TOKEN_STATIC, TOKEN_STRUCT, TOKEN_SWITCH, TOKEN_SYNC,
|
||||
TOKEN_TASK, TOKEN_TRUE, TOKEN_TYPEDEF, TOKEN_UNIFORM, TOKEN_UNSIGNED,
|
||||
TOKEN_VARYING, TOKEN_VOID, TOKEN_WHILE, TOKEN_STRING_C_LITERAL,
|
||||
TOKEN_DOTDOTDOT,
|
||||
TOKEN_FLOAT_CONSTANT,
|
||||
TOKEN_INT32_CONSTANT, TOKEN_UINT32_CONSTANT,
|
||||
TOKEN_INT64_CONSTANT, TOKEN_UINT64_CONSTANT,
|
||||
TOKEN_INC_OP, TOKEN_DEC_OP, TOKEN_LEFT_OP, TOKEN_RIGHT_OP, TOKEN_LE_OP,
|
||||
TOKEN_GE_OP, TOKEN_EQ_OP, TOKEN_NE_OP, TOKEN_AND_OP, TOKEN_OR_OP,
|
||||
TOKEN_MUL_ASSIGN, TOKEN_DIV_ASSIGN, TOKEN_MOD_ASSIGN, TOKEN_ADD_ASSIGN,
|
||||
TOKEN_SUB_ASSIGN, TOKEN_LEFT_ASSIGN, TOKEN_RIGHT_ASSIGN, TOKEN_AND_ASSIGN,
|
||||
TOKEN_XOR_ASSIGN, TOKEN_OR_ASSIGN, TOKEN_PTR_OP,
|
||||
';', '{', '}', ',', ':', '=', '(', ')', '[', ']', '.', '&', '!', '~', '-',
|
||||
'+', '*', '/', '%', '<', '>', '^', '|', '?',
|
||||
};
|
||||
|
||||
std::map<int, std::string> tokenToName;
|
||||
std::map<std::string, std::string> tokenNameRemap;
|
||||
|
||||
void ParserInit() {
|
||||
tokenToName[TOKEN_ASSERT] = "assert";
|
||||
tokenToName[TOKEN_BOOL] = "bool";
|
||||
tokenToName[TOKEN_BREAK] = "break";
|
||||
tokenToName[TOKEN_CASE] = "case";
|
||||
tokenToName[TOKEN_CBREAK] = "cbreak";
|
||||
tokenToName[TOKEN_CCONTINUE] = "ccontinue";
|
||||
tokenToName[TOKEN_CDO] = "cdo";
|
||||
tokenToName[TOKEN_CFOR] = "cfor";
|
||||
tokenToName[TOKEN_CIF] = "cif";
|
||||
tokenToName[TOKEN_CWHILE] = "cwhile";
|
||||
tokenToName[TOKEN_CONST] = "const";
|
||||
tokenToName[TOKEN_CONTINUE] = "continue";
|
||||
tokenToName[TOKEN_CRETURN] = "creturn";
|
||||
tokenToName[TOKEN_DEFAULT] = "default";
|
||||
tokenToName[TOKEN_DO] = "do";
|
||||
tokenToName[TOKEN_DELETE] = "delete";
|
||||
tokenToName[TOKEN_DOUBLE] = "double";
|
||||
tokenToName[TOKEN_ELSE] = "else";
|
||||
tokenToName[TOKEN_ENUM] = "enum";
|
||||
tokenToName[TOKEN_EXPORT] = "export";
|
||||
tokenToName[TOKEN_EXTERN] = "extern";
|
||||
tokenToName[TOKEN_FALSE] = "false";
|
||||
tokenToName[TOKEN_FLOAT] = "float";
|
||||
tokenToName[TOKEN_FOR] = "for";
|
||||
tokenToName[TOKEN_FOREACH] = "foreach";
|
||||
tokenToName[TOKEN_FOREACH_TILED] = "foreach_tiled";
|
||||
tokenToName[TOKEN_GOTO] = "goto";
|
||||
tokenToName[TOKEN_IF] = "if";
|
||||
tokenToName[TOKEN_INLINE] = "inline";
|
||||
tokenToName[TOKEN_INT] = "int";
|
||||
tokenToName[TOKEN_INT8] = "int8";
|
||||
tokenToName[TOKEN_INT16] = "int16";
|
||||
tokenToName[TOKEN_INT] = "int";
|
||||
tokenToName[TOKEN_INT64] = "int64";
|
||||
tokenToName[TOKEN_LAUNCH] = "launch";
|
||||
tokenToName[TOKEN_NEW] = "new";
|
||||
tokenToName[TOKEN_NULL] = "NULL";
|
||||
tokenToName[TOKEN_PRINT] = "print";
|
||||
tokenToName[TOKEN_RETURN] = "return";
|
||||
tokenToName[TOKEN_SOA] = "soa";
|
||||
tokenToName[TOKEN_SIGNED] = "signed";
|
||||
tokenToName[TOKEN_SIZEOF] = "sizeof";
|
||||
tokenToName[TOKEN_STATIC] = "static";
|
||||
tokenToName[TOKEN_STRUCT] = "struct";
|
||||
tokenToName[TOKEN_SWITCH] = "switch";
|
||||
tokenToName[TOKEN_SYNC] = "sync";
|
||||
tokenToName[TOKEN_TASK] = "task";
|
||||
tokenToName[TOKEN_TRUE] = "true";
|
||||
tokenToName[TOKEN_TYPEDEF] = "typedef";
|
||||
tokenToName[TOKEN_UNIFORM] = "uniform";
|
||||
tokenToName[TOKEN_UNSIGNED] = "unsigned";
|
||||
tokenToName[TOKEN_VARYING] = "varying";
|
||||
tokenToName[TOKEN_VOID] = "void";
|
||||
tokenToName[TOKEN_WHILE] = "while";
|
||||
tokenToName[TOKEN_STRING_C_LITERAL] = "\"C\"";
|
||||
tokenToName[TOKEN_DOTDOTDOT] = "...";
|
||||
tokenToName[TOKEN_FLOAT_CONSTANT] = "TOKEN_FLOAT_CONSTANT";
|
||||
tokenToName[TOKEN_INT32_CONSTANT] = "TOKEN_INT32_CONSTANT";
|
||||
tokenToName[TOKEN_UINT32_CONSTANT] = "TOKEN_UINT32_CONSTANT";
|
||||
tokenToName[TOKEN_INT64_CONSTANT] = "TOKEN_INT64_CONSTANT";
|
||||
tokenToName[TOKEN_UINT64_CONSTANT] = "TOKEN_UINT64_CONSTANT";
|
||||
tokenToName[TOKEN_INC_OP] = "++";
|
||||
tokenToName[TOKEN_DEC_OP] = "--";
|
||||
tokenToName[TOKEN_LEFT_OP] = "<<";
|
||||
tokenToName[TOKEN_RIGHT_OP] = ">>";
|
||||
tokenToName[TOKEN_LE_OP] = "<=";
|
||||
tokenToName[TOKEN_GE_OP] = ">=";
|
||||
tokenToName[TOKEN_EQ_OP] = "==";
|
||||
tokenToName[TOKEN_NE_OP] = "!=";
|
||||
tokenToName[TOKEN_AND_OP] = "&&";
|
||||
tokenToName[TOKEN_OR_OP] = "||";
|
||||
tokenToName[TOKEN_MUL_ASSIGN] = "*=";
|
||||
tokenToName[TOKEN_DIV_ASSIGN] = "/=";
|
||||
tokenToName[TOKEN_MOD_ASSIGN] = "%=";
|
||||
tokenToName[TOKEN_ADD_ASSIGN] = "+=";
|
||||
tokenToName[TOKEN_SUB_ASSIGN] = "-=";
|
||||
tokenToName[TOKEN_LEFT_ASSIGN] = "<<=";
|
||||
tokenToName[TOKEN_RIGHT_ASSIGN] = ">>=";
|
||||
tokenToName[TOKEN_AND_ASSIGN] = "&=";
|
||||
tokenToName[TOKEN_XOR_ASSIGN] = "^=";
|
||||
tokenToName[TOKEN_OR_ASSIGN] = "|=";
|
||||
tokenToName[TOKEN_PTR_OP] = "->";
|
||||
tokenToName[';'] = ";";
|
||||
tokenToName['{'] = "{";
|
||||
tokenToName['}'] = "}";
|
||||
tokenToName[','] = ",";
|
||||
tokenToName[':'] = ":";
|
||||
tokenToName['='] = "=";
|
||||
tokenToName['('] = "(";
|
||||
tokenToName[')'] = ")";
|
||||
tokenToName['['] = "[";
|
||||
tokenToName[']'] = "]";
|
||||
tokenToName['.'] = ".";
|
||||
tokenToName['&'] = "&";
|
||||
tokenToName['!'] = "!";
|
||||
tokenToName['~'] = "~";
|
||||
tokenToName['-'] = "-";
|
||||
tokenToName['+'] = "+";
|
||||
tokenToName['*'] = "*";
|
||||
tokenToName['/'] = "/";
|
||||
tokenToName['%'] = "%";
|
||||
tokenToName['<'] = "<";
|
||||
tokenToName['>'] = ">";
|
||||
tokenToName['^'] = "^";
|
||||
tokenToName['|'] = "|";
|
||||
tokenToName['?'] = "?";
|
||||
tokenToName[';'] = ";";
|
||||
|
||||
tokenNameRemap["TOKEN_ASSERT"] = "\'assert\'";
|
||||
tokenNameRemap["TOKEN_BOOL"] = "\'bool\'";
|
||||
tokenNameRemap["TOKEN_BREAK"] = "\'break\'";
|
||||
tokenNameRemap["TOKEN_CASE"] = "\'case\'";
|
||||
tokenNameRemap["TOKEN_CBREAK"] = "\'cbreak\'";
|
||||
tokenNameRemap["TOKEN_CCONTINUE"] = "\'ccontinue\'";
|
||||
tokenNameRemap["TOKEN_CDO"] = "\'cdo\'";
|
||||
tokenNameRemap["TOKEN_CFOR"] = "\'cfor\'";
|
||||
tokenNameRemap["TOKEN_CIF"] = "\'cif\'";
|
||||
tokenNameRemap["TOKEN_CWHILE"] = "\'cwhile\'";
|
||||
tokenNameRemap["TOKEN_CONST"] = "\'const\'";
|
||||
tokenNameRemap["TOKEN_CONTINUE"] = "\'continue\'";
|
||||
tokenNameRemap["TOKEN_CRETURN"] = "\'creturn\'";
|
||||
tokenNameRemap["TOKEN_DEFAULT"] = "\'default\'";
|
||||
tokenNameRemap["TOKEN_DO"] = "\'do\'";
|
||||
tokenNameRemap["TOKEN_DELETE"] = "\'delete\'";
|
||||
tokenNameRemap["TOKEN_DOUBLE"] = "\'double\'";
|
||||
tokenNameRemap["TOKEN_ELSE"] = "\'else\'";
|
||||
tokenNameRemap["TOKEN_ENUM"] = "\'enum\'";
|
||||
tokenNameRemap["TOKEN_EXPORT"] = "\'export\'";
|
||||
tokenNameRemap["TOKEN_EXTERN"] = "\'extern\'";
|
||||
tokenNameRemap["TOKEN_FALSE"] = "\'false\'";
|
||||
tokenNameRemap["TOKEN_FLOAT"] = "\'float\'";
|
||||
tokenNameRemap["TOKEN_FOR"] = "\'for\'";
|
||||
tokenNameRemap["TOKEN_FOREACH"] = "\'foreach\'";
|
||||
tokenNameRemap["TOKEN_FOREACH_TILED"] = "\'foreach_tiled\'";
|
||||
tokenNameRemap["TOKEN_GOTO"] = "\'goto\'";
|
||||
tokenNameRemap["TOKEN_IDENTIFIER"] = "identifier";
|
||||
tokenNameRemap["TOKEN_IF"] = "\'if\'";
|
||||
tokenNameRemap["TOKEN_INLINE"] = "\'inline\'";
|
||||
tokenNameRemap["TOKEN_INT"] = "\'int\'";
|
||||
tokenNameRemap["TOKEN_INT8"] = "\'int8\'";
|
||||
tokenNameRemap["TOKEN_INT16"] = "\'int16\'";
|
||||
tokenNameRemap["TOKEN_INT"] = "\'int\'";
|
||||
tokenNameRemap["TOKEN_INT64"] = "\'int64\'";
|
||||
tokenNameRemap["TOKEN_LAUNCH"] = "\'launch\'";
|
||||
tokenNameRemap["TOKEN_NEW"] = "\'new\'";
|
||||
tokenNameRemap["TOKEN_NULL"] = "\'NULL\'";
|
||||
tokenNameRemap["TOKEN_PRINT"] = "\'print\'";
|
||||
tokenNameRemap["TOKEN_RETURN"] = "\'return\'";
|
||||
tokenNameRemap["TOKEN_SOA"] = "\'soa\'";
|
||||
tokenNameRemap["TOKEN_SIGNED"] = "\'signed\'";
|
||||
tokenNameRemap["TOKEN_SIZEOF"] = "\'sizeof\'";
|
||||
tokenNameRemap["TOKEN_STATIC"] = "\'static\'";
|
||||
tokenNameRemap["TOKEN_STRUCT"] = "\'struct\'";
|
||||
tokenNameRemap["TOKEN_SWITCH"] = "\'switch\'";
|
||||
tokenNameRemap["TOKEN_SYNC"] = "\'sync\'";
|
||||
tokenNameRemap["TOKEN_TASK"] = "\'task\'";
|
||||
tokenNameRemap["TOKEN_TRUE"] = "\'true\'";
|
||||
tokenNameRemap["TOKEN_TYPEDEF"] = "\'typedef\'";
|
||||
tokenNameRemap["TOKEN_UNIFORM"] = "\'uniform\'";
|
||||
tokenNameRemap["TOKEN_UNSIGNED"] = "\'unsigned\'";
|
||||
tokenNameRemap["TOKEN_VARYING"] = "\'varying\'";
|
||||
tokenNameRemap["TOKEN_VOID"] = "\'void\'";
|
||||
tokenNameRemap["TOKEN_WHILE"] = "\'while\'";
|
||||
tokenNameRemap["TOKEN_STRING_C_LITERAL"] = "\"C\"";
|
||||
tokenNameRemap["TOKEN_DOTDOTDOT"] = "\'...\'";
|
||||
tokenNameRemap["TOKEN_FLOAT_CONSTANT"] = "float constant";
|
||||
tokenNameRemap["TOKEN_INT32_CONSTANT"] = "int32 constant";
|
||||
tokenNameRemap["TOKEN_UINT32_CONSTANT"] = "unsigned int32 constant";
|
||||
tokenNameRemap["TOKEN_INT64_CONSTANT"] = "int64 constant";
|
||||
tokenNameRemap["TOKEN_UINT64_CONSTANT"] = "unsigned int64 constant";
|
||||
tokenNameRemap["TOKEN_INC_OP"] = "\'++\'";
|
||||
tokenNameRemap["TOKEN_DEC_OP"] = "\'--\'";
|
||||
tokenNameRemap["TOKEN_LEFT_OP"] = "\'<<\'";
|
||||
tokenNameRemap["TOKEN_RIGHT_OP"] = "\'>>\'";
|
||||
tokenNameRemap["TOKEN_LE_OP"] = "\'<=\'";
|
||||
tokenNameRemap["TOKEN_GE_OP"] = "\'>=\'";
|
||||
tokenNameRemap["TOKEN_EQ_OP"] = "\'==\'";
|
||||
tokenNameRemap["TOKEN_NE_OP"] = "\'!=\'";
|
||||
tokenNameRemap["TOKEN_AND_OP"] = "\'&&\'";
|
||||
tokenNameRemap["TOKEN_OR_OP"] = "\'||\'";
|
||||
tokenNameRemap["TOKEN_MUL_ASSIGN"] = "\'*=\'";
|
||||
tokenNameRemap["TOKEN_DIV_ASSIGN"] = "\'/=\'";
|
||||
tokenNameRemap["TOKEN_MOD_ASSIGN"] = "\'%=\'";
|
||||
tokenNameRemap["TOKEN_ADD_ASSIGN"] = "\'+=\'";
|
||||
tokenNameRemap["TOKEN_SUB_ASSIGN"] = "\'-=\'";
|
||||
tokenNameRemap["TOKEN_LEFT_ASSIGN"] = "\'<<=\'";
|
||||
tokenNameRemap["TOKEN_RIGHT_ASSIGN"] = "\'>>=\'";
|
||||
tokenNameRemap["TOKEN_AND_ASSIGN"] = "\'&=\'";
|
||||
tokenNameRemap["TOKEN_XOR_ASSIGN"] = "\'^=\'";
|
||||
tokenNameRemap["TOKEN_OR_ASSIGN"] = "\'|=\'";
|
||||
tokenNameRemap["TOKEN_PTR_OP"] = "\'->\'";
|
||||
tokenNameRemap["$end"] = "end of file";
|
||||
}
|
||||
|
||||
|
||||
inline int ispcRand() {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
return rand();
|
||||
#else
|
||||
return lrand48();
|
||||
#endif
|
||||
}
|
||||
|
||||
#define RT \
|
||||
if (g->enableFuzzTest) { \
|
||||
int r = ispcRand() % 40; \
|
||||
if (r == 0) { \
|
||||
Warning(yylloc, "Fuzz test dropping token"); \
|
||||
} \
|
||||
else if (r == 1) { \
|
||||
Assert (tokenToName.size() > 0); \
|
||||
int nt = sizeof(allTokens) / sizeof(allTokens[0]); \
|
||||
int tn = ispcRand() % nt; \
|
||||
yylval.stringVal = new std::string(yytext); /* just in case */\
|
||||
Warning(yylloc, "Fuzz test replaced token with \"%s\"", tokenToName[allTokens[tn]].c_str()); \
|
||||
return allTokens[tn]; \
|
||||
} \
|
||||
else if (r == 2) { \
|
||||
Symbol *sym = m->symbolTable->RandomSymbol(); \
|
||||
if (sym != NULL) { \
|
||||
yylval.stringVal = new std::string(sym->name); \
|
||||
Warning(yylloc, "Fuzz test replaced with identifier \"%s\".", sym->name.c_str()); \
|
||||
return TOKEN_IDENTIFIER; \
|
||||
} \
|
||||
} \
|
||||
/* TOKEN_TYPE_NAME */ \
|
||||
} else /* swallow semicolon */
|
||||
|
||||
%}
|
||||
|
||||
%option nounput
|
||||
%option noyywrap
|
||||
%option bison-bridge
|
||||
%option bison-locations
|
||||
%option nounistd
|
||||
|
||||
WHITESPACE [ \t\r]+
|
||||
INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))
|
||||
INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[uUlL]*[kMG]?[uUlL]*
|
||||
INT_NUMBER_DOTDOTDOT (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[uUlL]*[kMG]?[uUlL]*\.\.\.
|
||||
FLOAT_NUMBER (([0-9]+|(([0-9]+\.[0-9]*[fF]?)|(\.[0-9]+)))([eE][-+]?[0-9]+)?[fF]?)
|
||||
HEX_FLOAT_NUMBER (0x[01](\.[0-9a-fA-F]*)?p[-+]?[0-9]+[fF]?)
|
||||
|
||||
@@ -75,73 +332,76 @@ IDENT [a-zA-Z_][a-zA-Z_0-9]*
|
||||
ZO_SWIZZLE ([01]+[w-z]+)+|([01]+[rgba]+)+|([01]+[uv]+)+
|
||||
|
||||
%%
|
||||
"/*" { lCComment(yylloc); }
|
||||
"//" { lCppComment(yylloc); }
|
||||
"/*" { lCComment(&yylloc); }
|
||||
"//" { lCppComment(&yylloc); }
|
||||
|
||||
__assert { return TOKEN_ASSERT; }
|
||||
bool { return TOKEN_BOOL; }
|
||||
break { return TOKEN_BREAK; }
|
||||
case { return TOKEN_CASE; }
|
||||
cbreak { return TOKEN_CBREAK; }
|
||||
ccontinue { return TOKEN_CCONTINUE; }
|
||||
cdo { return TOKEN_CDO; }
|
||||
cfor { return TOKEN_CFOR; }
|
||||
cif { return TOKEN_CIF; }
|
||||
cwhile { return TOKEN_CWHILE; }
|
||||
const { return TOKEN_CONST; }
|
||||
continue { return TOKEN_CONTINUE; }
|
||||
creturn { return TOKEN_CRETURN; }
|
||||
default { return TOKEN_DEFAULT; }
|
||||
do { return TOKEN_DO; }
|
||||
double { return TOKEN_DOUBLE; }
|
||||
else { return TOKEN_ELSE; }
|
||||
enum { return TOKEN_ENUM; }
|
||||
export { return TOKEN_EXPORT; }
|
||||
extern { return TOKEN_EXTERN; }
|
||||
false { return TOKEN_FALSE; }
|
||||
float { return TOKEN_FLOAT; }
|
||||
for { return TOKEN_FOR; }
|
||||
foreach { return TOKEN_FOREACH; }
|
||||
foreach_tiled { return TOKEN_FOREACH_TILED; }
|
||||
goto { return TOKEN_GOTO; }
|
||||
if { return TOKEN_IF; }
|
||||
inline { return TOKEN_INLINE; }
|
||||
int { return TOKEN_INT; }
|
||||
int8 { return TOKEN_INT8; }
|
||||
int16 { return TOKEN_INT16; }
|
||||
int32 { return TOKEN_INT; }
|
||||
int64 { return TOKEN_INT64; }
|
||||
launch { return TOKEN_LAUNCH; }
|
||||
NULL { return TOKEN_NULL; }
|
||||
print { return TOKEN_PRINT; }
|
||||
reference { Error(*yylloc, "\"reference\" qualifier is no longer supported; "
|
||||
"please use C++-style '&' syntax for references "
|
||||
"instead."); }
|
||||
return { return TOKEN_RETURN; }
|
||||
soa { return TOKEN_SOA; }
|
||||
signed { return TOKEN_SIGNED; }
|
||||
sizeof { return TOKEN_SIZEOF; }
|
||||
static { return TOKEN_STATIC; }
|
||||
struct { return TOKEN_STRUCT; }
|
||||
switch { return TOKEN_SWITCH; }
|
||||
sync { return TOKEN_SYNC; }
|
||||
task { return TOKEN_TASK; }
|
||||
true { return TOKEN_TRUE; }
|
||||
typedef { return TOKEN_TYPEDEF; }
|
||||
uniform { return TOKEN_UNIFORM; }
|
||||
unsigned { return TOKEN_UNSIGNED; }
|
||||
varying { return TOKEN_VARYING; }
|
||||
void { return TOKEN_VOID; }
|
||||
while { return TOKEN_WHILE; }
|
||||
\"C\" { return TOKEN_STRING_C_LITERAL; }
|
||||
\.\.\. { return TOKEN_DOTDOTDOT; }
|
||||
__assert { RT; return TOKEN_ASSERT; }
|
||||
bool { RT; return TOKEN_BOOL; }
|
||||
break { RT; return TOKEN_BREAK; }
|
||||
case { RT; return TOKEN_CASE; }
|
||||
cbreak { RT; return TOKEN_CBREAK; }
|
||||
ccontinue { RT; return TOKEN_CCONTINUE; }
|
||||
cdo { RT; return TOKEN_CDO; }
|
||||
cfor { RT; return TOKEN_CFOR; }
|
||||
cif { RT; return TOKEN_CIF; }
|
||||
cwhile { RT; return TOKEN_CWHILE; }
|
||||
const { RT; return TOKEN_CONST; }
|
||||
continue { RT; return TOKEN_CONTINUE; }
|
||||
creturn { RT; return TOKEN_CRETURN; }
|
||||
__declspec { RT; return TOKEN_DECLSPEC; }
|
||||
default { RT; return TOKEN_DEFAULT; }
|
||||
do { RT; return TOKEN_DO; }
|
||||
delete { RT; return TOKEN_DELETE; }
|
||||
delete\[\] { RT; return TOKEN_DELETE; }
|
||||
double { RT; return TOKEN_DOUBLE; }
|
||||
else { RT; return TOKEN_ELSE; }
|
||||
enum { RT; return TOKEN_ENUM; }
|
||||
export { RT; return TOKEN_EXPORT; }
|
||||
extern { RT; return TOKEN_EXTERN; }
|
||||
false { RT; return TOKEN_FALSE; }
|
||||
float { RT; return TOKEN_FLOAT; }
|
||||
for { RT; return TOKEN_FOR; }
|
||||
__foreach_active { RT; return TOKEN_FOREACH_ACTIVE; }
|
||||
foreach { RT; return TOKEN_FOREACH; }
|
||||
foreach_tiled { RT; return TOKEN_FOREACH_TILED; }
|
||||
goto { RT; return TOKEN_GOTO; }
|
||||
if { RT; return TOKEN_IF; }
|
||||
inline { RT; return TOKEN_INLINE; }
|
||||
int { RT; return TOKEN_INT; }
|
||||
int8 { RT; return TOKEN_INT8; }
|
||||
int16 { RT; return TOKEN_INT16; }
|
||||
int32 { RT; return TOKEN_INT; }
|
||||
int64 { RT; return TOKEN_INT64; }
|
||||
launch { RT; return TOKEN_LAUNCH; }
|
||||
new { RT; return TOKEN_NEW; }
|
||||
NULL { RT; return TOKEN_NULL; }
|
||||
print { RT; return TOKEN_PRINT; }
|
||||
return { RT; return TOKEN_RETURN; }
|
||||
soa { RT; return TOKEN_SOA; }
|
||||
signed { RT; return TOKEN_SIGNED; }
|
||||
sizeof { RT; return TOKEN_SIZEOF; }
|
||||
static { RT; return TOKEN_STATIC; }
|
||||
struct { RT; return TOKEN_STRUCT; }
|
||||
switch { RT; return TOKEN_SWITCH; }
|
||||
sync { RT; return TOKEN_SYNC; }
|
||||
task { RT; return TOKEN_TASK; }
|
||||
true { RT; return TOKEN_TRUE; }
|
||||
typedef { RT; return TOKEN_TYPEDEF; }
|
||||
uniform { RT; return TOKEN_UNIFORM; }
|
||||
unsigned { RT; return TOKEN_UNSIGNED; }
|
||||
varying { RT; return TOKEN_VARYING; }
|
||||
void { RT; return TOKEN_VOID; }
|
||||
while { RT; return TOKEN_WHILE; }
|
||||
\"C\" { RT; return TOKEN_STRING_C_LITERAL; }
|
||||
\.\.\. { RT; return TOKEN_DOTDOTDOT; }
|
||||
|
||||
L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL; }
|
||||
L?\"(\\.|[^\\"])*\" { lStringConst(&yylval, &yylloc); return TOKEN_STRING_LITERAL; }
|
||||
|
||||
{IDENT} {
|
||||
RT;
|
||||
/* We have an identifier--is it a type name or an identifier?
|
||||
The symbol table will straighten us out... */
|
||||
yylval->stringVal = new std::string(yytext);
|
||||
yylval.stringVal = new std::string(yytext);
|
||||
if (m->symbolTable->LookupType(yytext) != NULL)
|
||||
return TOKEN_TYPE_NAME;
|
||||
else
|
||||
@@ -149,126 +409,87 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
|
||||
}
|
||||
|
||||
{INT_NUMBER} {
|
||||
char *endPtr = NULL;
|
||||
int64_t val;
|
||||
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
val = lParseBinary(yytext+2, *yylloc);
|
||||
else {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
val = _strtoi64(yytext, &endPtr, 0);
|
||||
#else
|
||||
// FIXME: should use strtouq and then issue an error if we can't
|
||||
// fit into 64 bits...
|
||||
val = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
// See if we can fit this into a 32-bit integer...
|
||||
if ((val & 0xffffffff) == val) {
|
||||
yylval->int32Val = (int32_t)val;
|
||||
return TOKEN_INT32_CONSTANT;
|
||||
}
|
||||
else {
|
||||
yylval->int64Val = val;
|
||||
return TOKEN_INT64_CONSTANT;
|
||||
}
|
||||
RT;
|
||||
return lParseInteger(false);
|
||||
}
|
||||
|
||||
{INT_NUMBER}[uU] {
|
||||
char *endPtr = NULL;
|
||||
uint64_t val;
|
||||
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
val = lParseBinary(yytext+2, *yylloc);
|
||||
else {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
val = _strtoui64(yytext, &endPtr, 0);
|
||||
#else
|
||||
val = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
if ((val & 0xffffffff) == val) {
|
||||
// we can represent it in a 32-bit value
|
||||
yylval->int32Val = (int32_t)val;
|
||||
return TOKEN_UINT32_CONSTANT;
|
||||
}
|
||||
else {
|
||||
yylval->int64Val = val;
|
||||
return TOKEN_UINT64_CONSTANT;
|
||||
}
|
||||
{INT_NUMBER_DOTDOTDOT} {
|
||||
RT;
|
||||
return lParseInteger(true);
|
||||
}
|
||||
|
||||
|
||||
{FLOAT_NUMBER} {
|
||||
yylval->floatVal = atof(yytext);
|
||||
RT;
|
||||
yylval.floatVal = (float)atof(yytext);
|
||||
return TOKEN_FLOAT_CONSTANT;
|
||||
}
|
||||
|
||||
{HEX_FLOAT_NUMBER} {
|
||||
yylval->floatVal = lParseHexFloat(yytext);
|
||||
RT;
|
||||
yylval.floatVal = (float)lParseHexFloat(yytext);
|
||||
return TOKEN_FLOAT_CONSTANT;
|
||||
}
|
||||
|
||||
"++" { return TOKEN_INC_OP; }
|
||||
"--" { return TOKEN_DEC_OP; }
|
||||
"<<" { return TOKEN_LEFT_OP; }
|
||||
">>" { return TOKEN_RIGHT_OP; }
|
||||
"<=" { return TOKEN_LE_OP; }
|
||||
">=" { return TOKEN_GE_OP; }
|
||||
"==" { return TOKEN_EQ_OP; }
|
||||
"!=" { return TOKEN_NE_OP; }
|
||||
"&&" { return TOKEN_AND_OP; }
|
||||
"||" { return TOKEN_OR_OP; }
|
||||
"*=" { return TOKEN_MUL_ASSIGN; }
|
||||
"/=" { return TOKEN_DIV_ASSIGN; }
|
||||
"%=" { return TOKEN_MOD_ASSIGN; }
|
||||
"+=" { return TOKEN_ADD_ASSIGN; }
|
||||
"-=" { return TOKEN_SUB_ASSIGN; }
|
||||
"<<=" { return TOKEN_LEFT_ASSIGN; }
|
||||
">>=" { return TOKEN_RIGHT_ASSIGN; }
|
||||
"&=" { return TOKEN_AND_ASSIGN; }
|
||||
"^=" { return TOKEN_XOR_ASSIGN; }
|
||||
"|=" { return TOKEN_OR_ASSIGN; }
|
||||
"->" { return TOKEN_PTR_OP; }
|
||||
";" { return ';'; }
|
||||
("{"|"<%") { return '{'; }
|
||||
("}"|"%>") { return '}'; }
|
||||
"," { return ','; }
|
||||
":" { return ':'; }
|
||||
"=" { return '='; }
|
||||
"(" { return '('; }
|
||||
")" { return ')'; }
|
||||
("["|"<:") { return '['; }
|
||||
("]"|":>") { return ']'; }
|
||||
"." { return '.'; }
|
||||
"&" { return '&'; }
|
||||
"!" { return '!'; }
|
||||
"~" { return '~'; }
|
||||
"-" { return '-'; }
|
||||
"+" { return '+'; }
|
||||
"*" { return '*'; }
|
||||
"/" { return '/'; }
|
||||
"%" { return '%'; }
|
||||
"<" { return '<'; }
|
||||
">" { return '>'; }
|
||||
"^" { return '^'; }
|
||||
"|" { return '|'; }
|
||||
"?" { return '?'; }
|
||||
"++" { RT; return TOKEN_INC_OP; }
|
||||
"--" { RT; return TOKEN_DEC_OP; }
|
||||
"<<" { RT; return TOKEN_LEFT_OP; }
|
||||
">>" { RT; return TOKEN_RIGHT_OP; }
|
||||
"<=" { RT; return TOKEN_LE_OP; }
|
||||
">=" { RT; return TOKEN_GE_OP; }
|
||||
"==" { RT; return TOKEN_EQ_OP; }
|
||||
"!=" { RT; return TOKEN_NE_OP; }
|
||||
"&&" { RT; return TOKEN_AND_OP; }
|
||||
"||" { RT; return TOKEN_OR_OP; }
|
||||
"*=" { RT; return TOKEN_MUL_ASSIGN; }
|
||||
"/=" { RT; return TOKEN_DIV_ASSIGN; }
|
||||
"%=" { RT; return TOKEN_MOD_ASSIGN; }
|
||||
"+=" { RT; return TOKEN_ADD_ASSIGN; }
|
||||
"-=" { RT; return TOKEN_SUB_ASSIGN; }
|
||||
"<<=" { RT; return TOKEN_LEFT_ASSIGN; }
|
||||
">>=" { RT; return TOKEN_RIGHT_ASSIGN; }
|
||||
"&=" { RT; return TOKEN_AND_ASSIGN; }
|
||||
"^=" { RT; return TOKEN_XOR_ASSIGN; }
|
||||
"|=" { RT; return TOKEN_OR_ASSIGN; }
|
||||
"->" { RT; return TOKEN_PTR_OP; }
|
||||
";" { RT; return ';'; }
|
||||
("{"|"<%") { RT; return '{'; }
|
||||
("}"|"%>") { RT; return '}'; }
|
||||
"," { RT; return ','; }
|
||||
":" { RT; return ':'; }
|
||||
"=" { RT; return '='; }
|
||||
"(" { RT; return '('; }
|
||||
")" { RT; return ')'; }
|
||||
("["|"<:") { RT; return '['; }
|
||||
("]"|":>") { RT; return ']'; }
|
||||
"." { RT; return '.'; }
|
||||
"&" { RT; return '&'; }
|
||||
"!" { RT; return '!'; }
|
||||
"~" { RT; return '~'; }
|
||||
"-" { RT; return '-'; }
|
||||
"+" { RT; return '+'; }
|
||||
"*" { RT; return '*'; }
|
||||
"/" { RT; return '/'; }
|
||||
"%" { RT; return '%'; }
|
||||
"<" { RT; return '<'; }
|
||||
">" { RT; return '>'; }
|
||||
"^" { RT; return '^'; }
|
||||
"|" { RT; return '|'; }
|
||||
"?" { RT; return '?'; }
|
||||
|
||||
{WHITESPACE} { }
|
||||
|
||||
\n {
|
||||
yylloc->last_line++;
|
||||
yylloc->last_column = 1;
|
||||
yylloc.last_line++;
|
||||
yylloc.last_column = 1;
|
||||
}
|
||||
|
||||
#(line)?[ ][0-9]+[ ]\"(\\.|[^\\"])*\"[^\n]* {
|
||||
lHandleCppHash(yylloc);
|
||||
lHandleCppHash(&yylloc);
|
||||
}
|
||||
|
||||
. {
|
||||
Error(*yylloc, "Illegal character: %c (0x%x)", yytext[0], int(yytext[0]));
|
||||
Error(yylloc, "Illegal character: %c (0x%x)", yytext[0], int(yytext[0]));
|
||||
YY_USER_ACTION
|
||||
}
|
||||
|
||||
@@ -285,14 +506,11 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
|
||||
/** Return the integer version of a binary constant from a string.
|
||||
*/
|
||||
static uint64_t
|
||||
lParseBinary(const char *ptr, SourcePos pos) {
|
||||
lParseBinary(const char *ptr, SourcePos pos, char **endPtr) {
|
||||
uint64_t val = 0;
|
||||
bool warned = false;
|
||||
|
||||
while (*ptr != '\0') {
|
||||
/* if this hits, the regexp for 0b... constants is broken */
|
||||
assert(*ptr == '0' || *ptr == '1');
|
||||
|
||||
while (*ptr == '0' || *ptr == '1') {
|
||||
if ((val & (((int64_t)1)<<63)) && warned == false) {
|
||||
// We're about to shift out a set bit
|
||||
Warning(pos, "Can't represent binary constant with a 64-bit integer type");
|
||||
@@ -302,17 +520,86 @@ lParseBinary(const char *ptr, SourcePos pos) {
|
||||
val = (val << 1) | (*ptr == '0' ? 0 : 1);
|
||||
++ptr;
|
||||
}
|
||||
*endPtr = (char *)ptr;
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
lParseInteger(bool dotdotdot) {
|
||||
int ls = 0, us = 0;
|
||||
|
||||
char *endPtr = NULL;
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
yylval.intVal = lParseBinary(yytext+2, yylloc, &endPtr);
|
||||
else {
|
||||
#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__)
|
||||
yylval.intVal = _strtoui64(yytext, &endPtr, 0);
|
||||
#else
|
||||
// FIXME: should use strtouq and then issue an error if we can't
|
||||
// fit into 64 bits...
|
||||
yylval.intVal = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool kilo = false, mega = false, giga = false;
|
||||
for (; *endPtr; endPtr++) {
|
||||
if (*endPtr == 'k')
|
||||
kilo = true;
|
||||
else if (*endPtr == 'M')
|
||||
mega = true;
|
||||
else if (*endPtr == 'G')
|
||||
giga = true;
|
||||
else if (*endPtr == 'l' || *endPtr == 'L')
|
||||
ls++;
|
||||
else if (*endPtr == 'u' || *endPtr == 'U')
|
||||
us++;
|
||||
else
|
||||
Assert(dotdotdot && *endPtr == '.');
|
||||
}
|
||||
if (kilo)
|
||||
yylval.intVal *= 1024;
|
||||
if (mega)
|
||||
yylval.intVal *= 1024*1024;
|
||||
if (giga)
|
||||
yylval.intVal *= 1024*1024*1024;
|
||||
|
||||
if (dotdotdot) {
|
||||
if (ls >= 2)
|
||||
return us ? TOKEN_UINT64DOTDOTDOT_CONSTANT : TOKEN_INT64DOTDOTDOT_CONSTANT;
|
||||
else if (ls == 1)
|
||||
return us ? TOKEN_UINT32DOTDOTDOT_CONSTANT : TOKEN_INT32DOTDOTDOT_CONSTANT;
|
||||
|
||||
// See if we can fit this into a 32-bit integer...
|
||||
if ((yylval.intVal & 0xffffffff) == yylval.intVal)
|
||||
return us ? TOKEN_UINT32DOTDOTDOT_CONSTANT : TOKEN_INT32DOTDOTDOT_CONSTANT;
|
||||
else
|
||||
return us ? TOKEN_UINT64DOTDOTDOT_CONSTANT : TOKEN_INT64DOTDOTDOT_CONSTANT;
|
||||
}
|
||||
else {
|
||||
if (ls >= 2)
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
else if (ls == 1)
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
|
||||
// See if we can fit this into a 32-bit integer...
|
||||
if ((yylval.intVal & 0xffffffff) == yylval.intVal)
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
else
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Handle a C-style comment in the source.
|
||||
*/
|
||||
static void
|
||||
lCComment(SourcePos *pos) {
|
||||
char c, prev = 0;
|
||||
|
||||
|
||||
while ((c = yyinput()) != 0) {
|
||||
++pos->last_column;
|
||||
|
||||
if (c == '\n') {
|
||||
pos->last_line++;
|
||||
pos->last_column = 1;
|
||||
@@ -346,7 +633,7 @@ static void lHandleCppHash(SourcePos *pos) {
|
||||
char *ptr, *src;
|
||||
|
||||
// Advance past the opening stuff on the line.
|
||||
assert(yytext[0] == '#');
|
||||
Assert(yytext[0] == '#');
|
||||
if (yytext[1] == ' ')
|
||||
// On Linux/OSX, the preprocessor gives us lines like
|
||||
// # 1234 "foo.c"
|
||||
@@ -354,7 +641,7 @@ static void lHandleCppHash(SourcePos *pos) {
|
||||
else {
|
||||
// On windows, cl.exe's preprocessor gives us lines of the form:
|
||||
// #line 1234 "foo.c"
|
||||
assert(!strncmp(yytext+1, "line ", 5));
|
||||
Assert(!strncmp(yytext+1, "line ", 5));
|
||||
ptr = yytext + 6;
|
||||
}
|
||||
|
||||
@@ -364,13 +651,13 @@ static void lHandleCppHash(SourcePos *pos) {
|
||||
pos->last_column = 1;
|
||||
// Make sure that the character after the integer is a space and that
|
||||
// then we have open quotes
|
||||
assert(src != ptr && src[0] == ' ' && src[1] == '"');
|
||||
Assert(src != ptr && src[0] == ' ' && src[1] == '"');
|
||||
src += 2;
|
||||
|
||||
// And the filename is everything up until the closing quotes
|
||||
std::string filename;
|
||||
while (*src != '"') {
|
||||
assert(*src && *src != '\n');
|
||||
Assert(*src && *src != '\n');
|
||||
filename.push_back(*src);
|
||||
++src;
|
||||
}
|
||||
@@ -471,13 +758,13 @@ ipow2(int exponent) {
|
||||
*/
|
||||
static double
|
||||
lParseHexFloat(const char *ptr) {
|
||||
assert(ptr != NULL);
|
||||
Assert(ptr != NULL);
|
||||
|
||||
assert(ptr[0] == '0' && ptr[1] == 'x');
|
||||
Assert(ptr[0] == '0' && ptr[1] == 'x');
|
||||
ptr += 2;
|
||||
|
||||
// Start initializing the mantissa
|
||||
assert(*ptr == '0' || *ptr == '1');
|
||||
Assert(*ptr == '0' || *ptr == '1');
|
||||
double mantissa = (*ptr == '1') ? 1. : 0.;
|
||||
++ptr;
|
||||
|
||||
@@ -497,7 +784,7 @@ lParseHexFloat(const char *ptr) {
|
||||
else if (*ptr >= 'a' && *ptr <= 'f')
|
||||
digit = 10 + *ptr - 'a';
|
||||
else {
|
||||
assert(*ptr >= 'A' && *ptr <= 'F');
|
||||
Assert(*ptr >= 'A' && *ptr <= 'F');
|
||||
digit = 10 + *ptr - 'A';
|
||||
}
|
||||
|
||||
@@ -510,7 +797,7 @@ lParseHexFloat(const char *ptr) {
|
||||
else
|
||||
// If there's not a '.', then we better be going straight to the
|
||||
// exponent
|
||||
assert(*ptr == 'p');
|
||||
Assert(*ptr == 'p');
|
||||
|
||||
++ptr; // skip the 'p'
|
||||
|
||||
|
||||
1205
llvmutil.cpp
1205
llvmutil.cpp
File diff suppressed because it is too large
Load Diff
159
llvmutil.h
159
llvmutil.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,56 +38,60 @@
|
||||
#ifndef ISPC_LLVMUTIL_H
|
||||
#define ISPC_LLVMUTIL_H 1
|
||||
|
||||
#include "ispc.h"
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Type.h>
|
||||
#include <llvm/DerivedTypes.h>
|
||||
#include <llvm/Constants.h>
|
||||
|
||||
namespace llvm {
|
||||
class PHINode;
|
||||
class InsertElementInst;
|
||||
}
|
||||
|
||||
|
||||
/** This structure holds pointers to a variety of LLVM types; code
|
||||
elsewhere can use them from here, ratherthan needing to make more
|
||||
verbose LLVM API calls.
|
||||
*/
|
||||
struct LLVMTypes {
|
||||
static LLVM_TYPE_CONST llvm::Type *VoidType;
|
||||
static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *PointerIntType;
|
||||
static LLVM_TYPE_CONST llvm::Type *BoolType;
|
||||
static llvm::Type *VoidType;
|
||||
static llvm::PointerType *VoidPointerType;
|
||||
static llvm::Type *PointerIntType;
|
||||
static llvm::Type *BoolType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::Type *Int8Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int16Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int32Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int64Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *FloatType;
|
||||
static LLVM_TYPE_CONST llvm::Type *DoubleType;
|
||||
static llvm::Type *Int8Type;
|
||||
static llvm::Type *Int16Type;
|
||||
static llvm::Type *Int32Type;
|
||||
static llvm::Type *Int64Type;
|
||||
static llvm::Type *FloatType;
|
||||
static llvm::Type *DoubleType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::Type *Int8PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int16PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int32PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int64PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *FloatPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *DoublePointerType;
|
||||
static llvm::Type *Int8PointerType;
|
||||
static llvm::Type *Int16PointerType;
|
||||
static llvm::Type *Int32PointerType;
|
||||
static llvm::Type *Int64PointerType;
|
||||
static llvm::Type *FloatPointerType;
|
||||
static llvm::Type *DoublePointerType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::VectorType *MaskType;
|
||||
static llvm::VectorType *MaskType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::VectorType *BoolVectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int1VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int8VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int16VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int32VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int64VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *FloatVectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *DoubleVectorType;
|
||||
static llvm::VectorType *BoolVectorType;
|
||||
static llvm::VectorType *Int1VectorType;
|
||||
static llvm::VectorType *Int8VectorType;
|
||||
static llvm::VectorType *Int16VectorType;
|
||||
static llvm::VectorType *Int32VectorType;
|
||||
static llvm::VectorType *Int64VectorType;
|
||||
static llvm::VectorType *FloatVectorType;
|
||||
static llvm::VectorType *DoubleVectorType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::Type *Int8VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int16VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int32VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int64VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType;
|
||||
static llvm::Type *Int8VectorPointerType;
|
||||
static llvm::Type *Int16VectorPointerType;
|
||||
static llvm::Type *Int32VectorPointerType;
|
||||
static llvm::Type *Int64VectorPointerType;
|
||||
static llvm::Type *FloatVectorPointerType;
|
||||
static llvm::Type *DoubleVectorPointerType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::VectorType *VoidPointerVectorType;
|
||||
static llvm::VectorType *VoidPointerVectorType;
|
||||
};
|
||||
|
||||
/** These variables hold the corresponding LLVM constant values as a
|
||||
@@ -99,6 +103,7 @@ extern llvm::Constant *LLVMTrue, *LLVMFalse;
|
||||
of LLVMTypes and the LLVMTrue/LLVMFalse constants. However, it can't
|
||||
be called until the compilation target is known.
|
||||
*/
|
||||
struct Target;
|
||||
extern void InitLLVMUtil(llvm::LLVMContext *ctx, Target target);
|
||||
|
||||
/** Returns an LLVM i8 constant of the given value */
|
||||
@@ -161,6 +166,14 @@ extern llvm::Constant *LLVMFloatVector(float f);
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMDoubleVector(double f);
|
||||
|
||||
/** Returns a constant integer or vector (according to the given type) of
|
||||
the given signed integer value. */
|
||||
extern llvm::Constant *LLVMIntAsType(int64_t, llvm::Type *t);
|
||||
|
||||
/** Returns a constant integer or vector (according to the given type) of
|
||||
the given unsigned integer value. */
|
||||
extern llvm::Constant *LLVMUIntAsType(uint64_t, llvm::Type *t);
|
||||
|
||||
/** Returns an LLVM boolean vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMBoolVector(const bool *v);
|
||||
@@ -205,4 +218,82 @@ extern llvm::Constant *LLVMMaskAllOn;
|
||||
/** LLVM constant value representing an 'all off' SIMD lane mask */
|
||||
extern llvm::Constant *LLVMMaskAllOff;
|
||||
|
||||
/** Tests to see if all of the elements of the vector in the 'v' parameter
|
||||
are equal. Like lValuesAreEqual(), this is a conservative test and may
|
||||
return false for arrays where the values are actually all equal. */
|
||||
extern bool LLVMVectorValuesAllEqual(llvm::Value *v);
|
||||
|
||||
/** Given vector of integer-typed values, this function returns true if it
|
||||
can determine that the elements of the vector have a step of 'stride'
|
||||
between their values and false otherwise. This function tries to
|
||||
handle as many possibilities as possible, including things like all
|
||||
elements equal to some non-constant value plus an integer offset, etc.
|
||||
Needless to say (the halting problem and all that), it may return false
|
||||
for some vectors that are in fact linear.
|
||||
*/
|
||||
extern bool LLVMVectorIsLinear(llvm::Value *v, int stride);
|
||||
|
||||
/** Given a vector-typed value v, if the vector is a vector with constant
|
||||
element values, this function extracts those element values into the
|
||||
ret[] array and returns the number of elements (i.e. the vector type's
|
||||
width) in *nElts. It returns true if successful and false if the given
|
||||
vector is not in fact a vector of constants. */
|
||||
extern bool LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts);
|
||||
|
||||
/** This function takes chains of InsertElement instructions along the
|
||||
lines of:
|
||||
|
||||
%v0 = insertelement undef, value_0, i32 index_0
|
||||
%v1 = insertelement %v1, value_1, i32 index_1
|
||||
...
|
||||
%vn = insertelement %vn-1, value_n-1, i32 index_n-1
|
||||
|
||||
and initializes the provided elements array such that the i'th
|
||||
llvm::Value * in the array is the element that was inserted into the
|
||||
i'th element of the vector.
|
||||
|
||||
When the chain of insertelement instruction comes to an end, the only
|
||||
base case that this function handles is the initial value being a
|
||||
constant vector. For anything more complex (e.g. some other arbitrary
|
||||
value, it doesn't try to extract element values into the returned
|
||||
array.
|
||||
*/
|
||||
extern void LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
|
||||
llvm::Value **elements);
|
||||
|
||||
/** This is a utility routine for debugging that dumps out the given LLVM
|
||||
value as well as (recursively) all of the other values that it depends
|
||||
on. */
|
||||
extern void LLVMDumpValue(llvm::Value *v);
|
||||
|
||||
/** Given a vector-typed value, this function returns the value of its
|
||||
first element. Rather than just doing the straightforward thing of
|
||||
using a single extractelement instruction to do this, this function
|
||||
tries to rewrite the computation for the first element in scalar form;
|
||||
this is generally more efficient than computing the entire vector's
|
||||
worth of values just to extract the first element, in cases where only
|
||||
the first element's value is needed.
|
||||
*/
|
||||
extern llvm::Value *LLVMExtractFirstVectorElement(llvm::Value *v);
|
||||
|
||||
/** This function takes two vectors, expected to be the same length, and
|
||||
returns a new vector of twice the length that represents concatenating
|
||||
the two of them. */
|
||||
extern llvm::Value *LLVMConcatVectors(llvm::Value *v1, llvm::Value *v2,
|
||||
llvm::Instruction *insertBefore);
|
||||
|
||||
/** This is a utility function for vector shuffling; it takes two vectors
|
||||
v1 and v2, and a compile-time constant set of integer permutations in
|
||||
shuf[] and returns a new vector of length shufSize that represents the
|
||||
corresponding shufflevector operation. */
|
||||
extern llvm::Value *LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2,
|
||||
int32_t shuf[], int shufSize,
|
||||
llvm::Instruction *insertBefore);
|
||||
|
||||
/** Utility routines to concat strings with the names of existing values to
|
||||
create meaningful new names for instruction values.
|
||||
*/
|
||||
extern const char *LLVMGetName(llvm::Value *v, const char *);
|
||||
extern const char *LLVMGetName(const char *op, llvm::Value *v1, llvm::Value *v2);
|
||||
|
||||
#endif // ISPC_LLVMUTIL_H
|
||||
|
||||
199
main.cpp
199
main.cpp
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,44 +37,62 @@
|
||||
|
||||
#include "ispc.h"
|
||||
#include "module.h"
|
||||
#include "util.h"
|
||||
#include "type.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <llvm/Support/PrettyStackTrace.h>
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#include <time.h>
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
#include <llvm/Support/Signals.h>
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#else
|
||||
#include <llvm/Target/TargetRegistry.h>
|
||||
#include <llvm/Target/TargetSelect.h>
|
||||
#include <llvm/Target/SubtargetFeature.h>
|
||||
#endif
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#define strcasecmp stricmp
|
||||
#ifndef BUILD_DATE
|
||||
#define BUILD_DATE __DATE__
|
||||
#endif
|
||||
#define BUILD_VERSION ""
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
|
||||
static void usage(int ret) {
|
||||
printf("This is the Intel(r) SPMD Program Compiler (ispc), build %s (%s)\n\n",
|
||||
BUILD_DATE, BUILD_VERSION);
|
||||
printf("usage: ispc\n");
|
||||
static void
|
||||
lPrintVersion() {
|
||||
printf("Intel(r) SPMD Program Compiler (ispc), %s (build %s @ %s, LLVM %s)\n",
|
||||
ISPC_VERSION, BUILD_VERSION, BUILD_DATE,
|
||||
#if defined(LLVM_3_0)
|
||||
"3.0"
|
||||
#elif defined(LLVM_3_1) || defined(LLVM_3_1svn)
|
||||
"3.1"
|
||||
#else
|
||||
#error "Unhandled LLVM version"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
usage(int ret) {
|
||||
lPrintVersion();
|
||||
printf("\nusage: ispc\n");
|
||||
printf(" [--addressing={32,64}]\t\tSelect 32- or 64-bit addressing. (Note that 32-bit\n");
|
||||
printf(" \t\taddressing calculations are done by default, even\n");
|
||||
printf(" \t\ton 64-bit target architectures.)\n");
|
||||
printf(" [--arch={%s}]\t\tSelect target architecture\n",
|
||||
Target::SupportedTargetArchs());
|
||||
printf(" [--c++-include-file=<name>]\t\tSpecify name of file to emit in #include statement in generated C++ code.\n");
|
||||
printf(" [--cpu=<cpu>]\t\t\tSelect target CPU type\n");
|
||||
printf(" <cpu>={%s}\n", Target::SupportedTargetCPUs());
|
||||
printf(" <cpu>={%s}\n", Target::SupportedTargetCPUs().c_str());
|
||||
printf(" [-D<foo>]\t\t\t\t#define given value when running preprocessor\n");
|
||||
printf(" [--debug]\t\t\t\tPrint information useful for debugging ispc\n");
|
||||
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
|
||||
printf(" [--emit-c++]\t\t\tEmit a C++ source file as output\n");
|
||||
printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
|
||||
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
|
||||
printf(" [-g]\t\t\t\tGenerate debugging information\n");
|
||||
printf(" [--help]\t\t\t\tPrint help\n");
|
||||
printf(" [--help-dev]\t\t\tPrint help for developer options\n");
|
||||
printf(" [-h <name>/--header-outfile=<name>]\tOutput filename for header\n");
|
||||
printf(" [-I <path>]\t\t\t\tAdd <path> to #include file search path\n");
|
||||
printf(" [--instrument]\t\t\tEmit instrumentation to gather performance data\n");
|
||||
printf(" [--math-lib=<option>]\t\tSelect math library\n");
|
||||
printf(" default\t\t\t\tUse ispc's built-in math functions\n");
|
||||
@@ -90,20 +108,10 @@ static void usage(int ret) {
|
||||
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
|
||||
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
||||
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
||||
#if 0
|
||||
printf(" disable-handle-pseudo-memory-ops\n");
|
||||
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
|
||||
printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n");
|
||||
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
|
||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
|
||||
#endif
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
printf(" [--pic]\t\t\t\tGenerate position-independent code\n");
|
||||
#endif // !ISPC_IS_WINDOWS
|
||||
printf(" [--quiet]\t\t\t\tSuppress all output\n");
|
||||
printf(" [--target=<isa>]\t\t\tSelect target ISA. <isa>={%s}\n", Target::SupportedTargetISAs());
|
||||
printf(" [--version]\t\t\t\tPrint ispc version\n");
|
||||
printf(" [--werror]\t\t\t\tTreat warnings as errors\n");
|
||||
@@ -114,11 +122,33 @@ static void usage(int ret) {
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
devUsage(int ret) {
|
||||
lPrintVersion();
|
||||
printf("\nusage (developer options): ispc\n");
|
||||
printf(" [--debug]\t\t\t\tPrint information useful for debugging ispc\n");
|
||||
printf(" [--fuzz-test]\t\t\tRandomly perturb program input to test error conditions\n");
|
||||
printf(" [--fuzz-seed=<value>]\t\tSeed value for RNG for fuzz testing\n");
|
||||
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
|
||||
printf(" disable-all-on-optimizations\t\tDisable optimizations that take advantage of \"all on\" mask\n");
|
||||
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
||||
printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n");
|
||||
printf(" disable-coalescing\t\t\tDisable gather coalescing\n");
|
||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
|
||||
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
|
||||
printf(" disable-handle-pseudo-memory-ops\tLeave __pseudo_* calls for gather/scatter/etc. in final IR\n");
|
||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||
printf(" [--yydebug]\t\t\t\tPrint debugging information during parsing\n");
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
|
||||
/** We take arguments from both the command line as well as from the
|
||||
ISPC_ARGS environment variable. This function returns a new set of
|
||||
arguments representing the ones from those two sources merged together.
|
||||
*/
|
||||
*/
|
||||
static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128]) {
|
||||
// Copy over the command line arguments (passed in)
|
||||
for (int i = 0; i < Argc; ++i)
|
||||
@@ -161,15 +191,18 @@ static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128]) {
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lSignal(void *) {
|
||||
FATAL("Unhandled signal sent to process; terminating.");
|
||||
}
|
||||
|
||||
|
||||
int main(int Argc, char *Argv[]) {
|
||||
int argc;
|
||||
char *argv[128];
|
||||
lGetAllArgs(Argc, Argv, argc, argv);
|
||||
|
||||
// Use LLVM's little utility function to print out nice stack traces if
|
||||
// we crash
|
||||
llvm::sys::PrintStackTraceOnErrorSignal();
|
||||
llvm::PrettyStackTraceProgram X(argc, argv);
|
||||
llvm::sys::AddSignalHandler(lSignal, NULL);
|
||||
|
||||
// initialize available LLVM targets
|
||||
LLVMInitializeX86TargetInfo();
|
||||
@@ -177,13 +210,12 @@ int main(int Argc, char *Argv[]) {
|
||||
LLVMInitializeX86AsmPrinter();
|
||||
LLVMInitializeX86AsmParser();
|
||||
LLVMInitializeX86Disassembler();
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
LLVMInitializeX86TargetMC();
|
||||
#endif
|
||||
|
||||
char *file = NULL;
|
||||
const char *headerFileName = NULL;
|
||||
const char *outFileName = NULL;
|
||||
const char *includeFileName = NULL;
|
||||
|
||||
// Initiailize globals early so that we can set various option values
|
||||
// as we're parsing below
|
||||
@@ -197,13 +229,15 @@ int main(int Argc, char *Argv[]) {
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (!strcmp(argv[i], "--help"))
|
||||
usage(0);
|
||||
if (!strcmp(argv[i], "--help-dev"))
|
||||
devUsage(0);
|
||||
else if (!strncmp(argv[i], "-D", 2))
|
||||
g->cppArgs.push_back(argv[i]);
|
||||
else if (!strncmp(argv[i], "--addressing=", 13)) {
|
||||
if (atoi(argv[i] + 13) == 64)
|
||||
g->opt.force32BitAddressing = false;
|
||||
else if (atoi(argv[i] + 13) == 32)
|
||||
g->opt.force32BitAddressing = 32;
|
||||
g->opt.force32BitAddressing = true;
|
||||
else {
|
||||
fprintf(stderr, "Addressing width \"%s\" invalid--only 32 and "
|
||||
"64 are allowed.\n", argv[i]+13);
|
||||
@@ -233,13 +267,31 @@ int main(int Argc, char *Argv[]) {
|
||||
}
|
||||
else if (!strcmp(argv[i], "--emit-asm"))
|
||||
ot = Module::Asm;
|
||||
else if (!strcmp(argv[i], "--emit-c++"))
|
||||
ot = Module::CXX;
|
||||
else if (!strcmp(argv[i], "--emit-llvm"))
|
||||
ot = Module::Bitcode;
|
||||
else if (!strcmp(argv[i], "--emit-obj"))
|
||||
ot = Module::Object;
|
||||
else if (!strcmp(argv[i], "-I")) {
|
||||
if (++i == argc) {
|
||||
fprintf(stderr, "No path specified after -I option.\n");
|
||||
usage(1);
|
||||
}
|
||||
g->includePath.push_back(argv[i]);
|
||||
}
|
||||
else if (!strncmp(argv[i], "-I", 2))
|
||||
g->includePath.push_back(argv[i]+2);
|
||||
else if (!strcmp(argv[i], "--fuzz-test"))
|
||||
g->enableFuzzTest = true;
|
||||
else if (!strncmp(argv[i], "--fuzz-seed=", 12))
|
||||
g->fuzzTestSeed = atoi(argv[i] + 12);
|
||||
else if (!strcmp(argv[i], "--target")) {
|
||||
// FIXME: should remove this way of specifying the target...
|
||||
if (++i == argc) usage(1);
|
||||
if (++i == argc) {
|
||||
fprintf(stderr, "No target specified after --target option.\n");
|
||||
usage(1);
|
||||
}
|
||||
target = argv[i];
|
||||
}
|
||||
else if (!strncmp(argv[i], "--target=", 9))
|
||||
@@ -254,8 +306,10 @@ int main(int Argc, char *Argv[]) {
|
||||
g->mathLib = Globals::Math_SVML;
|
||||
else if (!strcmp(lib, "system"))
|
||||
g->mathLib = Globals::Math_System;
|
||||
else
|
||||
else {
|
||||
fprintf(stderr, "Unknown --math-lib= option \"%s\".\n", lib);
|
||||
usage(1);
|
||||
}
|
||||
}
|
||||
else if (!strncmp(argv[i], "--opt=", 6)) {
|
||||
const char *opt = argv[i] + 6;
|
||||
@@ -270,6 +324,10 @@ int main(int Argc, char *Argv[]) {
|
||||
|
||||
// These are only used for performance tests of specific
|
||||
// optimizations
|
||||
else if (!strcmp(opt, "disable-all-on-optimizations"))
|
||||
g->opt.disableMaskAllOnOptimizations = true;
|
||||
else if (!strcmp(opt, "disable-coalescing"))
|
||||
g->opt.disableCoalescing = true;
|
||||
else if (!strcmp(opt, "disable-handle-pseudo-memory-ops"))
|
||||
g->opt.disableHandlePseudoMemoryOps = true;
|
||||
else if (!strcmp(opt, "disable-blended-masked-stores"))
|
||||
@@ -286,10 +344,10 @@ int main(int Argc, char *Argv[]) {
|
||||
g->opt.disableGatherScatterFlattening = true;
|
||||
else if (!strcmp(opt, "disable-uniform-memory-optimizations"))
|
||||
g->opt.disableUniformMemoryOptimizations = true;
|
||||
else if (!strcmp(opt, "disable-masked-store-optimizations"))
|
||||
g->opt.disableMaskedStoreOptimizations = true;
|
||||
else
|
||||
else {
|
||||
fprintf(stderr, "Unknown --opt= option \"%s\".\n", opt);
|
||||
usage(1);
|
||||
}
|
||||
}
|
||||
else if (!strcmp(argv[i], "--woff") || !strcmp(argv[i], "-woff")) {
|
||||
g->disableWarnings = true;
|
||||
@@ -302,18 +360,27 @@ int main(int Argc, char *Argv[]) {
|
||||
else if (!strcmp(argv[i], "--wno-perf") || !strcmp(argv[i], "-wno-perf"))
|
||||
g->emitPerfWarnings = false;
|
||||
else if (!strcmp(argv[i], "-o")) {
|
||||
if (++i == argc) usage(1);
|
||||
if (++i == argc) {
|
||||
fprintf(stderr, "No output file specified after -o option.\n");
|
||||
usage(1);
|
||||
}
|
||||
outFileName = argv[i];
|
||||
}
|
||||
else if (!strcmp(argv[i], "--outfile="))
|
||||
else if (!strncmp(argv[i], "--outfile=", 10))
|
||||
outFileName = argv[i] + strlen("--outfile=");
|
||||
else if (!strcmp(argv[i], "-h")) {
|
||||
if (++i == argc) usage(1);
|
||||
if (++i == argc) {
|
||||
fprintf(stderr, "No header file name specified after -h option.\n");
|
||||
usage(1);
|
||||
}
|
||||
headerFileName = argv[i];
|
||||
}
|
||||
else if (!strcmp(argv[i], "--header-outfile=")) {
|
||||
else if (!strncmp(argv[i], "--header-outfile=", 17)) {
|
||||
headerFileName = argv[i] + strlen("--header-outfile=");
|
||||
}
|
||||
else if (!strncmp(argv[i], "--c++-include-file=", 19)) {
|
||||
includeFileName = argv[i] + strlen("--c++-include-file=");
|
||||
}
|
||||
else if (!strcmp(argv[i], "-O0")) {
|
||||
g->opt.level = 0;
|
||||
optSet = true;
|
||||
@@ -333,16 +400,26 @@ int main(int Argc, char *Argv[]) {
|
||||
else if (!strcmp(argv[i], "--pic"))
|
||||
generatePIC = true;
|
||||
#endif // !ISPC_IS_WINDOWS
|
||||
else if (!strcmp(argv[i], "--quiet"))
|
||||
g->quiet = true;
|
||||
else if (!strcmp(argv[i], "--yydebug")) {
|
||||
extern int yydebug;
|
||||
yydebug = 1;
|
||||
}
|
||||
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
|
||||
printf("Intel(r) SPMD Program Compiler (ispc) build %s (%s)\n",
|
||||
BUILD_DATE, BUILD_VERSION);
|
||||
lPrintVersion();
|
||||
return 0;
|
||||
}
|
||||
else if (argv[i][0] == '-')
|
||||
else if (argv[i][0] == '-') {
|
||||
fprintf(stderr, "Unknown option \"%s\".\n", argv[i]);
|
||||
usage(1);
|
||||
}
|
||||
else {
|
||||
if (file != NULL)
|
||||
if (file != NULL) {
|
||||
fprintf(stderr, "Multiple input files specified on command "
|
||||
"line: \"%s\" and \"%s\".\n", file, argv[i]);
|
||||
usage(1);
|
||||
}
|
||||
else
|
||||
file = argv[i];
|
||||
}
|
||||
@@ -354,6 +431,30 @@ int main(int Argc, char *Argv[]) {
|
||||
if (debugSet && !optSet)
|
||||
g->opt.level = 0;
|
||||
|
||||
if (g->enableFuzzTest) {
|
||||
if (g->fuzzTestSeed == -1) {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
int seed = (unsigned)time(NULL);
|
||||
#else
|
||||
int seed = getpid();
|
||||
#endif
|
||||
g->fuzzTestSeed = seed;
|
||||
Warning(SourcePos(), "Using seed %d for fuzz testing",
|
||||
g->fuzzTestSeed);
|
||||
}
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
srand(g->fuzzTestSeed);
|
||||
#else
|
||||
srand48(g->fuzzTestSeed);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (outFileName == NULL && headerFileName == NULL)
|
||||
Warning(SourcePos(), "No output file or header file name specified. "
|
||||
"Program will be compiled and warnings/errors will "
|
||||
"be issued, but no output will be generated.");
|
||||
|
||||
return Module::CompileAndOutput(file, arch, cpu, target, generatePIC,
|
||||
ot, outFileName, headerFileName);
|
||||
ot, outFileName, headerFileName,
|
||||
includeFileName);
|
||||
}
|
||||
|
||||
658
module.cpp
658
module.cpp
File diff suppressed because it is too large
Load Diff
28
module.h
28
module.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -59,27 +59,33 @@ public:
|
||||
int CompileFile();
|
||||
|
||||
/** Add a named type definition to the module. */
|
||||
void AddTypeDef(Symbol *sym);
|
||||
void AddTypeDef(const std::string &name, const Type *type,
|
||||
SourcePos pos);
|
||||
|
||||
/** Add a new global variable corresponding to the given Symbol to the
|
||||
module. If non-NULL, initExpr gives the initiailizer expression
|
||||
for the global's inital value. */
|
||||
void AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst);
|
||||
void AddGlobalVariable(const std::string &name, const Type *type,
|
||||
Expr *initExpr, bool isConst,
|
||||
StorageClass storageClass, SourcePos pos);
|
||||
|
||||
/** Add a declaration of the function defined by the given function
|
||||
symbol to the module. */
|
||||
void AddFunctionDeclaration(Symbol *funSym, bool isInline);
|
||||
void AddFunctionDeclaration(const std::string &name,
|
||||
const FunctionType *ftype,
|
||||
StorageClass sc, bool isInline, SourcePos pos);
|
||||
|
||||
/** Adds the function described by the declaration information and the
|
||||
provided statements to the module. */
|
||||
void AddFunctionDefinition(Symbol *sym, const std::vector<Symbol *> &args,
|
||||
Stmt *code);
|
||||
void AddFunctionDefinition(const std::string &name,
|
||||
const FunctionType *ftype, Stmt *code);
|
||||
|
||||
/** After a source file has been compiled, output can be generated in a
|
||||
number of different formats. */
|
||||
enum OutputType { Asm, /** Generate text assembly language output */
|
||||
Bitcode, /** Generate LLVM IR bitcode output */
|
||||
Object, /** Generate a native object file */
|
||||
CXX, /** Generate a C++ file */
|
||||
Header /** Generate a C/C++ header file with
|
||||
declarations of 'export'ed functions, global
|
||||
variables, and the types used by them. */
|
||||
@@ -108,6 +114,10 @@ public:
|
||||
inclusion from C/C++ code with declarations of
|
||||
types and functions exported from the given ispc
|
||||
source file.
|
||||
@param includeFileName If non-NULL, gives the filename for the C++
|
||||
backend to emit in an #include statement to
|
||||
get definitions of the builtins for the generic
|
||||
target.
|
||||
@return Number of errors encountered when compiling
|
||||
srcFile.
|
||||
*/
|
||||
@@ -115,7 +125,8 @@ public:
|
||||
const char *cpu, const char *targets,
|
||||
bool generatePIC, OutputType outputType,
|
||||
const char *outFileName,
|
||||
const char *headerFileName);
|
||||
const char *headerFileName,
|
||||
const char *includeFileName);
|
||||
|
||||
/** Total number of errors encountered during compilation. */
|
||||
int errorCount;
|
||||
@@ -138,7 +149,8 @@ private:
|
||||
true on success, false if there has been an error. The given
|
||||
filename may be NULL, indicating that output should go to standard
|
||||
output. */
|
||||
bool writeOutput(OutputType ot, const char *filename);
|
||||
bool writeOutput(OutputType ot, const char *filename,
|
||||
const char *includeFileName = NULL);
|
||||
bool writeHeader(const char *filename);
|
||||
bool writeObjectFileOrAssembly(OutputType outputType, const char *filename);
|
||||
static bool writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine,
|
||||
|
||||
404
run_tests.py
404
run_tests.py
@@ -2,9 +2,6 @@
|
||||
|
||||
# test-running driver for ispc
|
||||
|
||||
# TODO: windows support (mostly should be calling CL.exe rather than gcc
|
||||
# for static linking?)
|
||||
|
||||
from optparse import OptionParser
|
||||
import multiprocessing
|
||||
from ctypes import c_int
|
||||
@@ -15,35 +12,99 @@ import re
|
||||
import signal
|
||||
import random
|
||||
import string
|
||||
import mutex
|
||||
import subprocess
|
||||
import shlex
|
||||
import platform
|
||||
import tempfile
|
||||
|
||||
# disable fancy error/warning printing with ANSI colors, so grepping for error
|
||||
# messages doesn't get confused
|
||||
os.environ["TERM"] = "dumb"
|
||||
|
||||
# This script is affected by http://bugs.python.org/issue5261 on OSX 10.5 Leopard
|
||||
# git history has a workaround for that issue.
|
||||
|
||||
is_windows = (platform.system() == 'Windows' or
|
||||
'CYGWIN_NT' in platform.system())
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order tests",
|
||||
default=False, action="store_true")
|
||||
parser.add_option("-s", "--static-exe", dest="static_exe",
|
||||
help="Create and run a regular executable for each test (rather than using the LLVM JIT).",
|
||||
default=False, action="store_true")
|
||||
parser.add_option("-g", "--generics-include", dest="include_file", help="Filename for header implementing functions for generics",
|
||||
default=None)
|
||||
parser.add_option('-t', '--target', dest='target',
|
||||
help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2)',
|
||||
help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16)',
|
||||
default="sse4")
|
||||
parser.add_option('-a', '--arch', dest='arch',
|
||||
help='Set architecture (x86, x86-64)',
|
||||
default="x86-64")
|
||||
parser.add_option("-c", "--compiler", dest="compiler_exe", help="Compiler binary to use to run tests",
|
||||
default=None)
|
||||
parser.add_option('-o', '--no-opt', dest='no_opt', help='Disable optimization',
|
||||
default=False, action="store_true")
|
||||
parser.add_option('-j', '--jobs', dest='num_jobs', help='Maximum number of jobs to run in parallel',
|
||||
default="1024", type="int")
|
||||
parser.add_option('-v', '--verbose', dest='verbose', help='Enable verbose output',
|
||||
default=False, action="store_true")
|
||||
parser.add_option('--wrap-exe', dest='wrapexe',
|
||||
help='Executable to wrap test runs with (e.g. "valgrind")',
|
||||
default="")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
# if no specific test files are specified, run all of the tests in tests/
|
||||
# and failing_tests/
|
||||
if not is_windows:
|
||||
ispc_exe = "./ispc"
|
||||
else:
|
||||
ispc_exe = "../Release/ispc.exe"
|
||||
|
||||
is_generic_target = (options.target.find("generic-") != -1 and
|
||||
options.target != "generic-1")
|
||||
if is_generic_target and options.include_file == None:
|
||||
if options.target == "generic-4":
|
||||
sys.stderr.write("No generics #include specified; using examples/intrinsics/sse4.h\n")
|
||||
options.include_file = "examples/intrinsics/sse4.h"
|
||||
elif options.target == "generic-8":
|
||||
sys.stderr.write("No generics #include specified and no default available for \"generic-8\" target.\n")
|
||||
sys.exit(1)
|
||||
elif options.target == "generic-16":
|
||||
sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-16.h\n")
|
||||
options.include_file = "examples/intrinsics/generic-16.h"
|
||||
|
||||
if options.compiler_exe == None:
|
||||
if is_windows:
|
||||
options.compiler_exe = "cl"
|
||||
else:
|
||||
options.compiler_exe = "g++"
|
||||
|
||||
def fix_windows_paths(files):
|
||||
ret = [ ]
|
||||
for fn in files:
|
||||
ret += [ string.replace(fn, '\\', '/') ]
|
||||
return ret
|
||||
|
||||
|
||||
# if no specific test files are specified, run all of the tests in tests/,
|
||||
# failing_tests/, and tests_errors/
|
||||
if len(args) == 0:
|
||||
files = glob.glob("tests/*ispc") + glob.glob("failing_tests/*ispc") + \
|
||||
glob.glob("tests_errors/*ispc")
|
||||
files = fix_windows_paths(files)
|
||||
else:
|
||||
files = args
|
||||
if is_windows:
|
||||
argfiles = [ ]
|
||||
for f in args:
|
||||
# we have to glob ourselves if this is being run under a DOS
|
||||
# shell..
|
||||
argfiles += glob.glob(f)
|
||||
else:
|
||||
argfiles = args
|
||||
|
||||
files = [ ]
|
||||
for f in argfiles:
|
||||
if os.path.splitext(string.lower(f))[1] != ".ispc":
|
||||
sys.stdout.write("Ignoring file %s, which doesn't have an .ispc extension.\n" % f)
|
||||
else:
|
||||
files += [ f ]
|
||||
|
||||
# randomly shuffle the tests if asked to do so
|
||||
if (options.random):
|
||||
@@ -52,19 +113,14 @@ if (options.random):
|
||||
|
||||
# counter
|
||||
total_tests = 0
|
||||
finished_tests_counter = multiprocessing.Value(c_int)
|
||||
|
||||
# We'd like to use the Lock class from the multiprocessing package to
|
||||
# serialize accesses to finished_tests_counter. Unfortunately, the version of
|
||||
# python that ships with OSX 10.5 has this bug:
|
||||
# http://bugs.python.org/issue5261. Therefore, we use the (deprecated but
|
||||
# still available) mutex class.
|
||||
#finished_tests_counter_lock = multiprocessing.Lock()
|
||||
finished_tests_mutex = mutex.mutex()
|
||||
finished_tests_counter = multiprocessing.Value(c_int)
|
||||
finished_tests_counter_lock = multiprocessing.Lock()
|
||||
|
||||
# utility routine to print an update on the number of tests that have been
|
||||
# finished. Should be called with the mutex (or lock) held..
|
||||
# finished. Should be called with the lock held..
|
||||
def update_progress(fn):
|
||||
global total_tests
|
||||
finished_tests_counter.value = finished_tests_counter.value + 1
|
||||
progress_str = " Done %d / %d [%s]" % (finished_tests_counter.value, total_tests, fn)
|
||||
# spaces to clear out detrius from previous printing...
|
||||
@@ -73,138 +129,196 @@ def update_progress(fn):
|
||||
progress_str += '\r'
|
||||
sys.stdout.write(progress_str)
|
||||
sys.stdout.flush()
|
||||
finished_tests_mutex.unlock()
|
||||
|
||||
fnull = open(os.devnull, 'w')
|
||||
def run_command(cmd):
|
||||
if options.verbose:
|
||||
sys.stdout.write("Running: %s\n" % cmd)
|
||||
sp = subprocess.Popen(shlex.split(cmd), stdin=None,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
out = sp.communicate()
|
||||
output = ""
|
||||
output += out[0].decode("utf-8")
|
||||
output += out[1].decode("utf-8")
|
||||
|
||||
return (sp.returncode, output)
|
||||
|
||||
# run the commands in cmd_list
|
||||
def run_cmds(cmd_list, filename, expect_failure):
|
||||
for cmd in cmd_list:
|
||||
if expect_failure:
|
||||
failed = (subprocess.call(cmd, shell = True, stdout = fnull, stderr = fnull) != 0)
|
||||
else:
|
||||
failed = (os.system(cmd) != 0)
|
||||
if failed:
|
||||
break
|
||||
def run_cmds(compile_cmds, run_cmd, filename, expect_failure):
|
||||
for cmd in compile_cmds:
|
||||
(return_code, output) = run_command(cmd)
|
||||
compile_failed = (return_code != 0)
|
||||
if compile_failed:
|
||||
sys.stdout.write("Compilation of test %s failed \n" % filename)
|
||||
if output != "":
|
||||
sys.stdout.write("%s" % output)
|
||||
return (1, 0)
|
||||
|
||||
surprise = ((expect_failure and not failed) or (not expect_failure and failed))
|
||||
(return_code, output) = run_command(run_cmd)
|
||||
run_failed = (return_code != 0)
|
||||
|
||||
surprise = ((expect_failure and not run_failed) or
|
||||
(not expect_failure and run_failed))
|
||||
if surprise == True:
|
||||
print "Test %s %s " % \
|
||||
(filename, "unexpectedly passed" if expect_failure else "failed")
|
||||
return surprise
|
||||
sys.stderr.write("Test %s %s (return code %d) \n" % \
|
||||
(filename, "unexpectedly passed" if expect_failure else "failed",
|
||||
return_code))
|
||||
if output != "":
|
||||
sys.stdout.write("%s\n" % output)
|
||||
if surprise == True:
|
||||
return (0, 1)
|
||||
else:
|
||||
return (0, 0)
|
||||
|
||||
|
||||
# pull tests to run from the given queue and run them. Multiple copies of
|
||||
# this function will be running in parallel across all of the CPU cores of
|
||||
# the system.
|
||||
def run_tasks_from_queue(queue):
|
||||
error_count = 0
|
||||
while True:
|
||||
filename = queue.get()
|
||||
if (filename == 'STOP'):
|
||||
sys.exit(error_count)
|
||||
def run_test(filename):
|
||||
global is_windows
|
||||
if is_windows:
|
||||
input_prefix = "../"
|
||||
else:
|
||||
input_prefix = ""
|
||||
|
||||
# is this a test to make sure an error is issued?
|
||||
want_error = (filename.find("tests_errors") != -1)
|
||||
if want_error == True:
|
||||
ispc_cmd = ispc_exe + " --werror --nowrap %s --arch=%s --target=%s" % \
|
||||
(input_prefix + filename, options.arch, options.target)
|
||||
(return_code, output) = run_command(ispc_cmd)
|
||||
got_error = (return_code != 0)
|
||||
|
||||
# is this a test to make sure an error is issued?
|
||||
want_error = (filename.find("tests_errors") != -1)
|
||||
if want_error == True:
|
||||
ispc_cmd = "ispc --werror --nowrap %s --arch=%s --target=%s" % \
|
||||
(filename, options.arch, options.target)
|
||||
sp = subprocess.Popen(shlex.split(ispc_cmd), stdin=None, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
output = sp.communicate()[1]
|
||||
got_error = (sp.returncode != 0)
|
||||
|
||||
# figure out the error message we're expecting
|
||||
file = open(filename, 'r')
|
||||
firstline = file.readline()
|
||||
firstline = string.replace(firstline, "//", "")
|
||||
firstline = string.lstrip(firstline)
|
||||
firstline = string.rstrip(firstline)
|
||||
file.close()
|
||||
|
||||
if (output.find(firstline) == -1):
|
||||
print "Didn't see expected error message \"%s\" from test %s.\nActual outout: %s" % \
|
||||
(firstline, filename, output)
|
||||
error_count += 1
|
||||
elif got_error == False:
|
||||
print "Unexpectedly no errors issued from test %s" % filename
|
||||
error_count += 1
|
||||
continue
|
||||
# figure out the error message we're expecting
|
||||
file = open(input_prefix + filename, 'r')
|
||||
firstline = file.readline()
|
||||
firstline = firstline.replace("//", "")
|
||||
firstline = firstline.lstrip()
|
||||
firstline = firstline.rstrip()
|
||||
file.close()
|
||||
|
||||
if (output.find(firstline) == -1):
|
||||
sys.stderr.write("Didn't see expected error message %s from test %s.\nActual output:\n%s\n" % \
|
||||
(firstline, filename, output))
|
||||
return (1, 0)
|
||||
elif got_error == False:
|
||||
sys.stderr.write("Unexpectedly no errors issued from test %s\n" % filename)
|
||||
return (1, 0)
|
||||
else:
|
||||
return (0, 0)
|
||||
else:
|
||||
# do we expect this test to fail?
|
||||
should_fail = (filename.find("failing_") != -1)
|
||||
|
||||
if options.static_exe == True:
|
||||
# if the user wants us to build a static executable to run for
|
||||
# this test, we need to figure out the signature of the test
|
||||
# function that this test has.
|
||||
sig2def = { "f_v(" : 0, "f_f(" : 1, "f_fu(" : 2, "f_fi(" : 3,
|
||||
"f_du(" : 4, "f_duf(" : 5, "f_di(" : 6 }
|
||||
file = open(filename, 'r')
|
||||
match = -1
|
||||
for line in file:
|
||||
# look for lines with 'export'...
|
||||
if line.find("export") == -1:
|
||||
continue
|
||||
# one of them should have a function with one of the
|
||||
# declarations in sig2def
|
||||
for pattern, ident in sig2def.items():
|
||||
if line.find(pattern) != -1:
|
||||
match = ident
|
||||
break
|
||||
file.close()
|
||||
if match == -1:
|
||||
print "Fatal error: unable to find function signature in test %s" % filename
|
||||
error_count += 1
|
||||
# We need to figure out the signature of the test
|
||||
# function that this test has.
|
||||
sig2def = { "f_v(" : 0, "f_f(" : 1, "f_fu(" : 2, "f_fi(" : 3,
|
||||
"f_du(" : 4, "f_duf(" : 5, "f_di(" : 6 }
|
||||
file = open(input_prefix + filename, 'r')
|
||||
match = -1
|
||||
for line in file:
|
||||
# look for lines with 'export'...
|
||||
if line.find("export") == -1:
|
||||
continue
|
||||
# one of them should have a function with one of the
|
||||
# declarations in sig2def
|
||||
for pattern, ident in list(sig2def.items()):
|
||||
if line.find(pattern) != -1:
|
||||
match = ident
|
||||
break
|
||||
file.close()
|
||||
if match == -1:
|
||||
sys.stderr.write("Fatal error: unable to find function signature " + \
|
||||
"in test %s\n" % filename)
|
||||
return (1, 0)
|
||||
else:
|
||||
global is_generic_target
|
||||
if is_generic_target:
|
||||
obj_name = "%s.cpp" % filename
|
||||
|
||||
if is_windows:
|
||||
if not is_generic_target:
|
||||
obj_name = "%s%s.obj" % (input_prefix, filename)
|
||||
exe_name = "%s%s.exe" % (input_prefix, filename)
|
||||
|
||||
cc_cmd = "%s /I. /I../winstuff /Zi /nologo /DTEST_SIG=%d %stest_static.cpp %s /Fe%s" % \
|
||||
(options.compiler_exe, match, input_prefix, obj_name, exe_name)
|
||||
if should_fail:
|
||||
cc_cmd += " /DEXPECT_FAILURE"
|
||||
else:
|
||||
obj_name = "%s.o" % filename
|
||||
if not is_generic_target:
|
||||
obj_name = "%s.o" % filename
|
||||
exe_name = "%s.run" % filename
|
||||
ispc_cmd = "ispc --woff %s -o %s --arch=%s --target=%s" % \
|
||||
(filename, obj_name, options.arch, options.target)
|
||||
if options.no_opt:
|
||||
ispc_cmd += " -O0"
|
||||
|
||||
if options.arch == 'x86':
|
||||
gcc_arch = '-m32'
|
||||
else:
|
||||
gcc_arch = '-m64'
|
||||
gcc_cmd = "g++ %s test_static.cpp -DTEST_SIG=%d %s.o -o %s" % \
|
||||
(gcc_arch, match, filename, exe_name)
|
||||
cc_cmd = "%s -O2 -msse4.2 -I. %s test_static.cpp -DTEST_SIG=%d %s -o %s" % \
|
||||
(options.compiler_exe, gcc_arch, match, obj_name, exe_name)
|
||||
if platform.system() == 'Darwin':
|
||||
gcc_cmd += ' -Wl,-no_pie'
|
||||
cc_cmd += ' -Wl,-no_pie'
|
||||
if should_fail:
|
||||
gcc_cmd += " -DEXPECT_FAILURE"
|
||||
|
||||
# compile the ispc code, make the executable, and run it...
|
||||
error_count += run_cmds([ispc_cmd, gcc_cmd, exe_name], filename, should_fail)
|
||||
cc_cmd += " -DEXPECT_FAILURE"
|
||||
|
||||
# clean up after running the test
|
||||
ispc_cmd = ispc_exe + " --woff %s -o %s --arch=%s --target=%s" % \
|
||||
(input_prefix+filename, obj_name, options.arch, options.target)
|
||||
if options.no_opt:
|
||||
ispc_cmd += " -O0"
|
||||
if is_generic_target:
|
||||
ispc_cmd += " --emit-c++ --c++-include-file=%s" % options.include_file
|
||||
|
||||
# compile the ispc code, make the executable, and run it...
|
||||
(compile_error, run_error) = run_cmds([ispc_cmd, cc_cmd],
|
||||
options.wrapexe + " " + exe_name, \
|
||||
filename, should_fail)
|
||||
|
||||
# clean up after running the test
|
||||
try:
|
||||
if not run_error:
|
||||
os.unlink(exe_name)
|
||||
if is_windows:
|
||||
os.unlink("%s%s.pdb" % (input_prefix, filename))
|
||||
os.unlink("%s%s.ilk" % (input_prefix, filename))
|
||||
os.unlink(obj_name)
|
||||
except:
|
||||
None
|
||||
|
||||
return (compile_error, run_error)
|
||||
|
||||
# pull tests to run from the given queue and run them. Multiple copies of
|
||||
# this function will be running in parallel across all of the CPU cores of
|
||||
# the system.
|
||||
def run_tasks_from_queue(queue, queue_ret):
|
||||
if is_windows:
|
||||
tmpdir = "tmp%d" % os.getpid()
|
||||
os.mkdir(tmpdir)
|
||||
os.chdir(tmpdir)
|
||||
else:
|
||||
olddir = ""
|
||||
|
||||
compile_error_files = [ ]
|
||||
run_error_files = [ ]
|
||||
while True:
|
||||
filename = queue.get()
|
||||
if (filename == 'STOP'):
|
||||
queue_ret.put((compile_error_files, run_error_files))
|
||||
if is_windows:
|
||||
try:
|
||||
os.unlink(exe_name)
|
||||
os.unlink(obj_name)
|
||||
os.remove("test_static.obj")
|
||||
os.remove("/vc100.pdb")
|
||||
os.chdir("..")
|
||||
os.rmdir(tmpdir)
|
||||
except:
|
||||
None
|
||||
else:
|
||||
# otherwise we'll use ispc_test + the LLVM JIT to run the test
|
||||
bitcode_file = "%s.bc" % filename
|
||||
compile_cmd = "ispc --woff --emit-llvm %s --target=%s -o %s" % \
|
||||
(filename, options.target, bitcode_file)
|
||||
if options.no_opt:
|
||||
compile_cmd += " -O0"
|
||||
test_cmd = "ispc_test %s" % bitcode_file
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
error_count += run_cmds([compile_cmd, test_cmd], filename, should_fail)
|
||||
|
||||
try:
|
||||
os.unlink(bitcode_file)
|
||||
except:
|
||||
None
|
||||
|
||||
# If not for http://bugs.python.org/issue5261 on OSX, we'd like to do this:
|
||||
#with finished_tests_counter_lock:
|
||||
#update_progress(filename)
|
||||
# but instead we do this...
|
||||
finished_tests_mutex.lock(update_progress, filename)
|
||||
(compile_error, run_error) = run_test(filename)
|
||||
if compile_error != 0:
|
||||
compile_error_files += [ filename ]
|
||||
if run_error != 0:
|
||||
run_error_files += [ filename ]
|
||||
|
||||
with finished_tests_counter_lock:
|
||||
update_progress(filename)
|
||||
|
||||
task_threads = []
|
||||
|
||||
@@ -214,16 +328,23 @@ def sigint(signum, frame):
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
nthreads = multiprocessing.cpu_count()
|
||||
total_tests = len(files)
|
||||
print "Found %d CPUs. Running %d tests." % (nthreads, total_tests)
|
||||
|
||||
compile_error_files = [ ]
|
||||
run_error_files = [ ]
|
||||
|
||||
nthreads = min(multiprocessing.cpu_count(), options.num_jobs)
|
||||
sys.stdout.write("Running %d jobs in parallel. Running %d tests.\n" % (nthreads, total_tests))
|
||||
|
||||
# put each of the test filenames into a queue
|
||||
q = multiprocessing.Queue()
|
||||
for fn in files:
|
||||
if is_windows:
|
||||
fn = fn.replace("\\",'/')
|
||||
q.put(fn)
|
||||
for x in range(nthreads):
|
||||
q.put('STOP')
|
||||
qret = multiprocessing.Queue()
|
||||
|
||||
# need to catch sigint so that we can terminate all of the tasks if
|
||||
# we're interrupted
|
||||
@@ -231,17 +352,30 @@ if __name__ == '__main__':
|
||||
|
||||
# launch jobs to run tests
|
||||
for x in range(nthreads):
|
||||
t = multiprocessing.Process(target=run_tasks_from_queue, args=(q,))
|
||||
t = multiprocessing.Process(target=run_tasks_from_queue, args=(q,qret))
|
||||
task_threads.append(t)
|
||||
t.start()
|
||||
|
||||
# wait for them to all finish and then return the number that failed
|
||||
# (i.e. return 0 if all is ok)
|
||||
error_count = 0
|
||||
for t in task_threads:
|
||||
t.join()
|
||||
error_count += t.exitcode
|
||||
print
|
||||
if error_count > 0:
|
||||
print "%d / %d tests FAILED!" % (error_count, total_tests)
|
||||
sys.exit(error_count)
|
||||
sys.stdout.write("\n")
|
||||
|
||||
while not qret.empty():
|
||||
(c, r) = qret.get()
|
||||
compile_error_files += c
|
||||
run_error_files += r
|
||||
|
||||
if len(compile_error_files) > 0:
|
||||
compile_error_files.sort()
|
||||
sys.stdout.write("%d / %d tests FAILED compilation:\n" % (len(compile_error_files), total_tests))
|
||||
for f in compile_error_files:
|
||||
sys.stdout.write("\t%s\n" % f)
|
||||
if len(run_error_files) > 0:
|
||||
run_error_files.sort()
|
||||
sys.stdout.write("%d / %d tests FAILED execution:\n" % (len(run_error_files), total_tests))
|
||||
for f in run_error_files:
|
||||
sys.stdout.write("\t%s\n" % f)
|
||||
|
||||
sys.exit(len(compile_error_files) + len(run_error_files))
|
||||
|
||||
95
run_tests.sh
95
run_tests.sh
@@ -1,95 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
surprises=0
|
||||
verbose=false
|
||||
number=$(ls -1 tests/*.ispc|wc -l)
|
||||
counter=1
|
||||
target=sse4
|
||||
|
||||
while getopts ":vt:h" opt;do
|
||||
case $opt in
|
||||
v) verbose=true
|
||||
;;
|
||||
t) target=$OPTARG
|
||||
;;
|
||||
h) cat <<EOF
|
||||
usage: run_tests.sh [-v] [-t target] [filenames]
|
||||
-v # verbose output
|
||||
-t # specify compilation target (SSE4 is the default).
|
||||
[filenames] # (optional) files to run through testing infrastructure
|
||||
# if none are provided, all in tests/ will be run.
|
||||
EOF
|
||||
exit 1
|
||||
esac
|
||||
done
|
||||
|
||||
ISPC_ARCH=x86-64
|
||||
if [[ $OS == "Windows_NT" ]]; then
|
||||
ISPC_ARCH=x86
|
||||
fi
|
||||
ISPC_ARGS="--target=$target --arch=$ISPC_ARCH -O2 --woff"
|
||||
|
||||
shift $(( $OPTIND - 1 ))
|
||||
if [[ "$1" > 0 ]]; then
|
||||
while [[ "$1" > 0 ]]; do
|
||||
i=$1
|
||||
shift
|
||||
echo Running test $i
|
||||
|
||||
bc=${i%%ispc}bc
|
||||
ispc $ISPC_ARGS $i -o $bc --emit-llvm
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc compile
|
||||
echo
|
||||
else
|
||||
ispc_test $bc
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc_test
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
/bin/rm -f $bc
|
||||
done
|
||||
else
|
||||
echo Running all correctness tests
|
||||
|
||||
for i in tests/*.ispc; do
|
||||
if $verbose; then
|
||||
echo -en "Running test $counter of $number.\r"
|
||||
fi
|
||||
(( counter++ ))
|
||||
bc=${i%%ispc}bc
|
||||
ispc $ISPC_ARGS $i -o $bc --emit-llvm
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc compile
|
||||
echo
|
||||
else
|
||||
ispc_test $bc
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc_test
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
/bin/rm -f $bc
|
||||
done
|
||||
|
||||
echo -e "\nRunning failing tests"
|
||||
for i in failing_tests/*.ispc; do
|
||||
(ispc -O2 $i -woff -o - --emit-llvm | ispc_test -) 2>/dev/null 1>/dev/null
|
||||
if [[ $? == 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i UNEXPECTEDLY PASSED
|
||||
echo
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ $surprises == 0 ]]; then
|
||||
echo No surprises.
|
||||
fi
|
||||
|
||||
exit $surprises
|
||||
2100
stdlib.ispc
2100
stdlib.ispc
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user