Compare commits
131 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d65bf2eb2f | ||
|
|
1bba9d4307 | ||
|
|
4388338dad | ||
|
|
2fb59c90cf | ||
|
|
68f6ea8def | ||
|
|
3f89295d10 | ||
|
|
748b292e77 | ||
|
|
6451c3d99d | ||
|
|
d14a2de168 | ||
|
|
642150095d | ||
|
|
3bf3ac7922 | ||
|
|
c6d1cebad4 | ||
|
|
08189ce08c | ||
|
|
7013d7d52f | ||
|
|
7045b76f84 | ||
|
|
58a0b4a20d | ||
|
|
0f8eee9809 | ||
|
|
0740299860 | ||
|
|
652215861e | ||
|
|
602209e5a8 | ||
|
|
b60f8b4f70 | ||
|
|
b67446d998 | ||
|
|
9670ab0887 | ||
|
|
0223bb85ee | ||
|
|
fd81255db1 | ||
|
|
8a8e1a7f73 | ||
|
|
ef05fbf424 | ||
|
|
fa01b63fa5 | ||
|
|
63d3d25030 | ||
|
|
a8db866228 | ||
|
|
0519eea951 | ||
|
|
f4653ecd11 | ||
|
|
5d67252ed0 | ||
|
|
5134de71c0 | ||
|
|
2be1251c70 | ||
|
|
c0161aa17f | ||
|
|
b683aa11b1 | ||
|
|
2654bb0112 | ||
|
|
d8728104b4 | ||
|
|
0be1b70fba | ||
|
|
a0e9793de3 | ||
|
|
da9200fcee | ||
|
|
54e8e8022b | ||
|
|
d84cf781da | ||
|
|
002f27a30f | ||
|
|
86d88e9773 | ||
|
|
fda00afe6e | ||
|
|
be0c77d556 | ||
|
|
0ed11a7832 | ||
|
|
ff6971fb15 | ||
|
|
5b4dbc8167 | ||
|
|
59f4c9985e | ||
|
|
8da9be1a09 | ||
|
|
11033e108e | ||
|
|
4f97262cf2 | ||
|
|
9b68b9087a | ||
|
|
15cc812e37 | ||
|
|
71317e6aa6 | ||
|
|
1abaaee73e | ||
|
|
78c6d3c02f | ||
|
|
48e9d4af39 | ||
|
|
cb7ad371c6 | ||
|
|
2951589825 | ||
|
|
f23dc5366a | ||
|
|
e3341176c5 | ||
|
|
8938e14442 | ||
|
|
4151778f5e | ||
|
|
23b85cd88d | ||
|
|
234e5cd3e1 | ||
|
|
f75c94a8f1 | ||
|
|
848a432640 | ||
|
|
dea13979e0 | ||
|
|
052d34bf5b | ||
|
|
d4c5e82896 | ||
|
|
562d61caff | ||
|
|
75f18c7c66 | ||
|
|
5d35349dc9 | ||
|
|
1a81173c93 | ||
|
|
1d9201fe3d | ||
|
|
6dbb15027a | ||
|
|
f23d030e43 | ||
|
|
701334ccf2 | ||
|
|
f48a662ed3 | ||
|
|
ced3f1f5fc | ||
|
|
018aa96c8b | ||
|
|
34eda04d9b | ||
|
|
45767ad197 | ||
|
|
f9463af75b | ||
|
|
6f6e28077f | ||
|
|
0a9a7c939a | ||
|
|
f30a5dea79 | ||
|
|
018b547c40 | ||
|
|
e82a720223 | ||
|
|
8d1b77b235 | ||
|
|
b8987faeee | ||
|
|
17fdab2793 | ||
|
|
1fa6520cb6 | ||
|
|
b6af5c16c6 | ||
|
|
10ebe88abf | ||
|
|
c0b41ad6f5 | ||
|
|
9920b30318 | ||
|
|
07f218137a | ||
|
|
89a5248f4f | ||
|
|
891919074e | ||
|
|
4adf527a4d | ||
|
|
533b539780 | ||
|
|
6f26ae9801 | ||
|
|
ddcdfff3ae | ||
|
|
5b48354d9a | ||
|
|
46bfef3fce | ||
|
|
20536bb339 | ||
|
|
f6605ee465 | ||
|
|
034507a35b | ||
|
|
0b2febcec0 | ||
|
|
d2fa735ef1 | ||
|
|
20f34b67da | ||
|
|
03f3db1e89 | ||
|
|
9805b0742d | ||
|
|
6000c696b2 | ||
|
|
5a2edf723b | ||
|
|
aec7da740a | ||
|
|
a79bc75b72 | ||
|
|
eaaebf7928 | ||
|
|
198aa9620e | ||
|
|
27c53a3c25 | ||
|
|
bd70182369 | ||
|
|
04df63d955 | ||
|
|
d59131d670 | ||
|
|
9475e13d81 | ||
|
|
765d86076f | ||
|
|
e2b6ed3db8 |
128
Makefile
128
Makefile
@@ -3,22 +3,47 @@
|
||||
#
|
||||
|
||||
ARCH_OS = $(shell uname)
|
||||
ifeq ($(ARCH_OS), Darwin)
|
||||
ARCH_OS2 = "OSX"
|
||||
else
|
||||
ARCH_OS2 = $(shell uname -o)
|
||||
endif
|
||||
ARCH_TYPE = $(shell arch)
|
||||
|
||||
ifeq ($(shell llvm-config --version), 3.1svn)
|
||||
LLVM_LIBS=-lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker \
|
||||
-lLLVMArchive -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo \
|
||||
-lLLVMBitWriter -lLLVMTableGen -lLLVMCBackendInfo \
|
||||
-lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMSelectionDAG \
|
||||
-lLLVMAsmPrinter -lLLVMX86AsmParser -lLLVMX86Desc -lLLVMX86Info \
|
||||
-lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCDisassembler -lLLVMMCParser \
|
||||
-lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMTransformUtils \
|
||||
-lLLVMipa -lLLVMAnalysis -lLLVMMCJIT -lLLVMRuntimeDyld \
|
||||
-lLLVMExecutionEngine -lLLVMTarget -lLLVMMC -lLLVMObject -lLLVMCore \
|
||||
-lLLVMSupport
|
||||
else
|
||||
LLVM_LIBS=$(shell llvm-config --libs)
|
||||
endif
|
||||
|
||||
CLANG=clang
|
||||
CLANG_LIBS = -lclangFrontend -lclangDriver \
|
||||
-lclangSerialization -lclangParse -lclangSema \
|
||||
-lclangAnalysis -lclangAST -lclangLex -lclangBasic
|
||||
|
||||
ISPC_LIBS=$(CLANG_LIBS) \
|
||||
$(shell llvm-config --ldflags --libs) \
|
||||
-lpthread -ldl
|
||||
ISPC_TEST_LIBS=$(shell llvm-config --ldflags --libs) \
|
||||
-lpthread -ldl
|
||||
ISPC_LIBS=$(shell llvm-config --ldflags) $(CLANG_LIBS) $(LLVM_LIBS) \
|
||||
-lpthread
|
||||
|
||||
ifeq ($(ARCH_OS),Linux)
|
||||
ISPC_LIBS += -ldl
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH_OS2),Msys)
|
||||
ISPC_LIBS += -lshlwapi -limagehlp -lpsapi
|
||||
endif
|
||||
|
||||
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
||||
LLVM_VERSION=$(shell llvm-config --version | sed s/\\./_/)
|
||||
LLVM_VERSION_DEF=-DLLVM_$(LLVM_VERSION)
|
||||
LLVM_VERSION=LLVM_$(shell llvm-config --version | sed s/\\./_/)
|
||||
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
||||
|
||||
BUILD_DATE=$(shell date +%Y%m%d)
|
||||
BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
|
||||
@@ -33,11 +58,7 @@ LDFLAGS=
|
||||
ifeq ($(ARCH_OS),Linux)
|
||||
# try to link everything statically under Linux (including libstdc++) so
|
||||
# that the binaries we generate will be portable across distributions...
|
||||
ifeq ($(ARCH_TYPE),x86_64)
|
||||
LDFLAGS=-static -L/usr/lib/gcc/x86_64-linux-gnu/4.4
|
||||
else
|
||||
LDFLAGS=-L/usr/lib/gcc/i686-redhat-linux/4.6.0
|
||||
endif
|
||||
LDFLAGS=-static
|
||||
endif
|
||||
|
||||
LEX=flex
|
||||
@@ -45,21 +66,25 @@ YACC=bison -d -v -t
|
||||
|
||||
###########################################################################
|
||||
|
||||
CXX_SRC=ast.cpp builtins.cpp ctx.cpp decl.cpp expr.cpp func.cpp ispc.cpp \
|
||||
llvmutil.cpp main.cpp module.cpp opt.cpp stmt.cpp sym.cpp type.cpp \
|
||||
util.cpp
|
||||
CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
||||
ispc.cpp llvmutil.cpp main.cpp module.cpp opt.cpp stmt.cpp sym.cpp \
|
||||
type.cpp util.cpp
|
||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||
opt.h stmt.h sym.h type.h util.h
|
||||
BUILTINS_SRC=builtins-avx.ll builtins-avx-x2.ll builtins-sse2.ll builtins-sse2-x2.ll \
|
||||
builtins-sse4.ll builtins-sse4-x2.ll builtins-dispatch.ll
|
||||
TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
|
||||
generic-16
|
||||
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
|
||||
builtins/dispatch.ll
|
||||
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
|
||||
builtins-c-32.cpp builtins-c-64.cpp
|
||||
BISON_SRC=parse.yy
|
||||
FLEX_SRC=lex.ll
|
||||
|
||||
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_SRC:.ll=.o) \
|
||||
builtins-c-32.o builtins-c-64.o stdlib_ispc.o $(BISON_SRC:.yy=.o) \
|
||||
$(FLEX_SRC:.ll=.o))
|
||||
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \
|
||||
stdlib_generic_ispc.o stdlib_x86_ispc.o \
|
||||
$(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o))
|
||||
|
||||
default: ispc ispc_test
|
||||
default: ispc
|
||||
|
||||
.PHONY: dirs clean depend doxygen print_llvm_src
|
||||
.PRECIOUS: objs/builtins-%.cpp
|
||||
@@ -78,7 +103,7 @@ print_llvm_src:
|
||||
@echo Using LLVM `llvm-config --version` from `llvm-config --libdir`
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs ispc ispc_test
|
||||
/bin/rm -rf objs ispc
|
||||
|
||||
doxygen:
|
||||
/bin/rm -rf docs/doxygen
|
||||
@@ -88,14 +113,18 @@ ispc: print_llvm_src dirs $(OBJS)
|
||||
@echo Creating ispc executable
|
||||
@$(CXX) $(LDFLAGS) -o $@ $(OBJS) $(ISPC_LIBS)
|
||||
|
||||
ispc_test: dirs ispc_test.cpp
|
||||
@echo Creating ispc_test executable
|
||||
@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ ispc_test.cpp $(ISPC_TEST_LIBS)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/cbackend.o: cbackend.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) -fno-rtti -fno-exceptions $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/%.o: objs/%.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/parse.cc: parse.yy
|
||||
@echo Running bison on $<
|
||||
@$(YACC) -o $@ $<
|
||||
@@ -112,41 +141,24 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/builtins-%.cpp: builtins-%.ll
|
||||
@echo Creating C++ source from builtin definitions file $<
|
||||
@m4 -DLLVM_VERSION=$(LLVM_VERSION) builtins.m4 $< | ./bitcode2cpp.py $< > $@
|
||||
|
||||
objs/builtins-%.o: objs/builtins-%.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/builtins-c-32.cpp: builtins-c.c
|
||||
objs/builtins-%.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll)
|
||||
@echo Creating C++ source from builtins definition file $<
|
||||
@$(CLANG) -m32 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py builtins-c-32.c > $@
|
||||
@m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) $< | python bitcode2cpp.py $< > $@
|
||||
|
||||
objs/builtins-c-32.o: objs/builtins-c-32.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/builtins-c-64.cpp: builtins-c.c
|
||||
objs/builtins-c-32.cpp: builtins/builtins.c
|
||||
@echo Creating C++ source from builtins definition file $<
|
||||
@$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py builtins-c-64.c > $@
|
||||
@$(CLANG) -m32 -emit-llvm -c $< -o - | llvm-dis - | python bitcode2cpp.py c-32 > $@
|
||||
|
||||
objs/builtins-c-64.o: objs/builtins-c-64.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
objs/builtins-c-64.cpp: builtins/builtins.c
|
||||
@echo Creating C++ source from builtins definition file $<
|
||||
@$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | python bitcode2cpp.py c-64 > $@
|
||||
|
||||
objs/stdlib_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $<
|
||||
@$(CLANG) -E -x c -DISPC=1 -DPI=3.1415926536 $< -o - | ./stdlib2cpp.py > $@
|
||||
objs/stdlib_generic_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $< for generic
|
||||
@$(CLANG) -E -x c -DISPC_TARGET_GENERIC=1 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||
python stdlib2cpp.py generic > $@
|
||||
|
||||
objs/stdlib_ispc.o: objs/stdlib_ispc.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/builtins-sse2.cpp: builtins.m4 builtins-sse2-common.ll builtins-sse2.ll
|
||||
objs/builtins-sse2-x2.cpp: builtins.m4 builtins-sse2-common.ll builtins-sse2-x2.ll
|
||||
objs/builtins-sse4.cpp: builtins.m4 builtins-sse4-common.ll builtins-sse4.ll
|
||||
objs/builtins-sse4-x2.cpp: builtins.m4 builtins-sse4-common.ll builtins-sse4-x2.ll
|
||||
objs/builtins-avx.cpp: builtins.m4 builtins-avx-common.ll builtins-avx.ll
|
||||
objs/builtins-avx-x2.cpp: builtins.m4 builtins-avx-common.ll builtins-avx-x2.ll
|
||||
objs/stdlib_x86_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $< for x86
|
||||
@$(CLANG) -E -x c -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||
python stdlib2cpp.py x86 > $@
|
||||
|
||||
90
README.rst
Normal file
90
README.rst
Normal file
@@ -0,0 +1,90 @@
|
||||
==============================
|
||||
Intel(r) SPMD Program Compiler
|
||||
==============================
|
||||
|
||||
``ispc`` is a compiler for a variant of the C programming language, with
|
||||
extensions for `single program, multiple data
|
||||
<http://en.wikipedia.org/wiki/SPMD>`_ programming. Under the SPMD model,
|
||||
the programmer writes a program that generally appears to be a regular
|
||||
serial program, though the execution model is actually that a number of
|
||||
*program instances* execute in parallel on the hardware.
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
``ispc`` compiles a C-based SPMD programming language to run on the SIMD
|
||||
units of CPUs; it frequently provides a 3x or more speedup on CPUs with
|
||||
4-wide vector SSE units and 5x-6x on CPUs with 8-wide AVX vector units,
|
||||
without any of the difficulty of writing intrinsics code. Parallelization
|
||||
across multiple cores is also supported by ``ispc``, making it
|
||||
possible to write programs that achieve performance improvement that scales
|
||||
by both number of cores and vector unit size.
|
||||
|
||||
There are a few key principles in the design of ``ispc``:
|
||||
|
||||
* To build a small set of extensions to the C language that
|
||||
would deliver excellent performance to performance-oriented
|
||||
programmers who want to run SPMD programs on the CPU.
|
||||
|
||||
* To provide a thin abstraction layer between the programmer
|
||||
and the hardware--in particular, to have an execution and
|
||||
data model where the programmer can cleanly reason about the
|
||||
mapping of their source program to compiled assembly language
|
||||
and the underlying hardware.
|
||||
|
||||
* To make it possible to harness the computational power of SIMD
|
||||
vector units without the extremely low-programmer-productivity
|
||||
activity of directly writing intrinsics.
|
||||
|
||||
* To explore opportunities from close coupling between C/C++
|
||||
application code and SPMD ``ispc`` code running on the
|
||||
same processor--to have lightweight function calls between
|
||||
the two languages and to share data directly via pointers without
|
||||
copying or reformatting.
|
||||
|
||||
``ispc`` is an open source compiler with the BSD license. It uses the
|
||||
remarkable `LLVM Compiler Infrastructure <http://llvm.org>`_ for back-end
|
||||
code generation and optimization and is `hosted on
|
||||
github <http://github.com/ispc/ispc/>`_. It supports Windows, Mac, and
|
||||
Linux, with both x86 and x86-64 targets. It currently supports the SSE2,
|
||||
SSE4, and AVX instruction sets.
|
||||
|
||||
Features
|
||||
--------
|
||||
|
||||
``ispc`` provides a number of key features to developers:
|
||||
|
||||
* Familiarity as an extension of the C programming
|
||||
language: ``ispc`` supports familiar C syntax and
|
||||
programming idioms, while adding the ability to write SPMD
|
||||
programs.
|
||||
|
||||
* High-quality SIMD code generation: the performance
|
||||
of code generated by ``ispc`` is often close to that of
|
||||
hand-written intrinsics code.
|
||||
|
||||
* Ease of adoption with existing software
|
||||
systems: functions written in ``ispc`` directly
|
||||
interoperate with application functions written in C/C++ and
|
||||
with application data structures.
|
||||
|
||||
* Portability across over a decade of CPU
|
||||
generations: ``ispc`` has targets for SSE2, SSE4, AVX
|
||||
(and soon, AVX2).
|
||||
|
||||
* Portability across operating systems: Microsoft
|
||||
Windows, Mac OS X, and Linux are all supported
|
||||
by ``ispc``.
|
||||
|
||||
* Debugging with standard tools: ``ispc``
|
||||
programs can be debugged with standard debuggers (OS X and
|
||||
Linux only).
|
||||
|
||||
Additional Resources
|
||||
--------------------
|
||||
|
||||
Prebuilt ``ispc`` binaries for Windows, OS X and Linux can be downloaded
|
||||
from the `ispc downloads page <http://ispc.github.com/downloads.html>`_.
|
||||
See also additional
|
||||
`documentation <http://ispc.github.com/documentation.html>`_ and additional
|
||||
`performance information <http://ispc.github.com/perf.html>`_.
|
||||
22
README.txt
22
README.txt
@@ -1,22 +0,0 @@
|
||||
==============================
|
||||
Intel(r) SPMD Program Compiler
|
||||
==============================
|
||||
|
||||
Welcome to the Intel(r) SPMD Program Compiler (ispc)!
|
||||
|
||||
ispc is a new compiler for "single program, multiple data" (SPMD)
|
||||
programs. Under the SPMD model, the programmer writes a program that mostly
|
||||
appears to be a regular serial program, though the execution model is
|
||||
actually that a number of program instances execute in parallel on the
|
||||
hardware. ispc compiles a C-based SPMD programming language to run on the
|
||||
SIMD units of CPUs; it frequently provides a a 3x or more speedup on CPUs
|
||||
with 4-wide SSE units, without any of the difficulty of writing intrinsics
|
||||
code.
|
||||
|
||||
ispc is an open source compiler under the BSD license; see the file
|
||||
LICENSE.txt. ispc supports Windows, Mac, and Linux, with both x86 and
|
||||
x86-64 targets. It currently supports the SSE2, SSE4, and AVX instruction
|
||||
sets.
|
||||
|
||||
For more information and examples, as well as a wiki and the bug database,
|
||||
see the ispc distribution site, http://ispc.github.com.
|
||||
242
ast.cpp
242
ast.cpp
@@ -36,8 +36,11 @@
|
||||
*/
|
||||
|
||||
#include "ast.h"
|
||||
#include "expr.h"
|
||||
#include "func.h"
|
||||
#include "stmt.h"
|
||||
#include "sym.h"
|
||||
#include "util.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// ASTNode
|
||||
@@ -63,3 +66,242 @@ AST::GenerateIR() {
|
||||
functions[i]->GenerateIR();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
ASTNode *
|
||||
WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
|
||||
void *data) {
|
||||
if (node == NULL)
|
||||
return node;
|
||||
|
||||
// Call the callback function
|
||||
if (preFunc != NULL) {
|
||||
if (preFunc(node, data) == false)
|
||||
// The function asked us to not continue recursively, so stop.
|
||||
return node;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Handle Statements
|
||||
if (dynamic_cast<Stmt *>(node) != NULL) {
|
||||
ExprStmt *es;
|
||||
DeclStmt *ds;
|
||||
IfStmt *is;
|
||||
DoStmt *dos;
|
||||
ForStmt *fs;
|
||||
ForeachStmt *fes;
|
||||
CaseStmt *cs;
|
||||
DefaultStmt *defs;
|
||||
SwitchStmt *ss;
|
||||
ReturnStmt *rs;
|
||||
LabeledStmt *ls;
|
||||
StmtList *sl;
|
||||
PrintStmt *ps;
|
||||
AssertStmt *as;
|
||||
|
||||
if ((es = dynamic_cast<ExprStmt *>(node)) != NULL)
|
||||
es->expr = (Expr *)WalkAST(es->expr, preFunc, postFunc, data);
|
||||
else if ((ds = dynamic_cast<DeclStmt *>(node)) != NULL) {
|
||||
for (unsigned int i = 0; i < ds->vars.size(); ++i)
|
||||
ds->vars[i].init = (Expr *)WalkAST(ds->vars[i].init, preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((is = dynamic_cast<IfStmt *>(node)) != NULL) {
|
||||
is->test = (Expr *)WalkAST(is->test, preFunc, postFunc, data);
|
||||
is->trueStmts = (Stmt *)WalkAST(is->trueStmts, preFunc,
|
||||
postFunc, data);
|
||||
is->falseStmts = (Stmt *)WalkAST(is->falseStmts, preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((dos = dynamic_cast<DoStmt *>(node)) != NULL) {
|
||||
dos->testExpr = (Expr *)WalkAST(dos->testExpr, preFunc,
|
||||
postFunc, data);
|
||||
dos->bodyStmts = (Stmt *)WalkAST(dos->bodyStmts, preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((fs = dynamic_cast<ForStmt *>(node)) != NULL) {
|
||||
fs->init = (Stmt *)WalkAST(fs->init, preFunc, postFunc, data);
|
||||
fs->test = (Expr *)WalkAST(fs->test, preFunc, postFunc, data);
|
||||
fs->step = (Stmt *)WalkAST(fs->step, preFunc, postFunc, data);
|
||||
fs->stmts = (Stmt *)WalkAST(fs->stmts, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((fes = dynamic_cast<ForeachStmt *>(node)) != NULL) {
|
||||
for (unsigned int i = 0; i < fes->startExprs.size(); ++i)
|
||||
fes->startExprs[i] = (Expr *)WalkAST(fes->startExprs[i], preFunc,
|
||||
postFunc, data);
|
||||
for (unsigned int i = 0; i < fes->endExprs.size(); ++i)
|
||||
fes->endExprs[i] = (Expr *)WalkAST(fes->endExprs[i], preFunc,
|
||||
postFunc, data);
|
||||
fes->stmts = (Stmt *)WalkAST(fes->stmts, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((cs = dynamic_cast<CaseStmt *>(node)) != NULL)
|
||||
cs->stmts = (Stmt *)WalkAST(cs->stmts, preFunc, postFunc, data);
|
||||
else if ((defs = dynamic_cast<DefaultStmt *>(node)) != NULL)
|
||||
defs->stmts = (Stmt *)WalkAST(defs->stmts, preFunc, postFunc, data);
|
||||
else if ((ss = dynamic_cast<SwitchStmt *>(node)) != NULL) {
|
||||
ss->expr = (Expr *)WalkAST(ss->expr, preFunc, postFunc, data);
|
||||
ss->stmts = (Stmt *)WalkAST(ss->stmts, preFunc, postFunc, data);
|
||||
}
|
||||
else if (dynamic_cast<BreakStmt *>(node) != NULL ||
|
||||
dynamic_cast<ContinueStmt *>(node) != NULL ||
|
||||
dynamic_cast<GotoStmt *>(node) != NULL) {
|
||||
// nothing
|
||||
}
|
||||
else if ((ls = dynamic_cast<LabeledStmt *>(node)) != NULL)
|
||||
ls->stmt = (Stmt *)WalkAST(ls->stmt, preFunc, postFunc, data);
|
||||
else if ((rs = dynamic_cast<ReturnStmt *>(node)) != NULL)
|
||||
rs->val = (Expr *)WalkAST(rs->val, preFunc, postFunc, data);
|
||||
else if ((sl = dynamic_cast<StmtList *>(node)) != NULL) {
|
||||
std::vector<Stmt *> &sls = sl->stmts;
|
||||
for (unsigned int i = 0; i < sls.size(); ++i)
|
||||
sls[i] = (Stmt *)WalkAST(sls[i], preFunc, postFunc, data);
|
||||
}
|
||||
else if ((ps = dynamic_cast<PrintStmt *>(node)) != NULL)
|
||||
ps->values = (Expr *)WalkAST(ps->values, preFunc, postFunc, data);
|
||||
else if ((as = dynamic_cast<AssertStmt *>(node)) != NULL)
|
||||
as->expr = (Expr *)WalkAST(as->expr, preFunc, postFunc, data);
|
||||
else
|
||||
FATAL("Unhandled statement type in WalkAST()");
|
||||
}
|
||||
else {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Handle expressions
|
||||
Assert(dynamic_cast<Expr *>(node) != NULL);
|
||||
UnaryExpr *ue;
|
||||
BinaryExpr *be;
|
||||
AssignExpr *ae;
|
||||
SelectExpr *se;
|
||||
ExprList *el;
|
||||
FunctionCallExpr *fce;
|
||||
IndexExpr *ie;
|
||||
MemberExpr *me;
|
||||
TypeCastExpr *tce;
|
||||
ReferenceExpr *re;
|
||||
DereferenceExpr *dre;
|
||||
SizeOfExpr *soe;
|
||||
AddressOfExpr *aoe;
|
||||
|
||||
if ((ue = dynamic_cast<UnaryExpr *>(node)) != NULL)
|
||||
ue->expr = (Expr *)WalkAST(ue->expr, preFunc, postFunc, data);
|
||||
else if ((be = dynamic_cast<BinaryExpr *>(node)) != NULL) {
|
||||
be->arg0 = (Expr *)WalkAST(be->arg0, preFunc, postFunc, data);
|
||||
be->arg1 = (Expr *)WalkAST(be->arg1, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((ae = dynamic_cast<AssignExpr *>(node)) != NULL) {
|
||||
ae->lvalue = (Expr *)WalkAST(ae->lvalue, preFunc, postFunc, data);
|
||||
ae->rvalue = (Expr *)WalkAST(ae->rvalue, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((se = dynamic_cast<SelectExpr *>(node)) != NULL) {
|
||||
se->test = (Expr *)WalkAST(se->test, preFunc, postFunc, data);
|
||||
se->expr1 = (Expr *)WalkAST(se->expr1, preFunc, postFunc, data);
|
||||
se->expr2 = (Expr *)WalkAST(se->expr2, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((el = dynamic_cast<ExprList *>(node)) != NULL) {
|
||||
for (unsigned int i = 0; i < el->exprs.size(); ++i)
|
||||
el->exprs[i] = (Expr *)WalkAST(el->exprs[i], preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((fce = dynamic_cast<FunctionCallExpr *>(node)) != NULL) {
|
||||
fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data);
|
||||
fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data);
|
||||
fce->launchCountExpr = (Expr *)WalkAST(fce->launchCountExpr, preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL) {
|
||||
ie->baseExpr = (Expr *)WalkAST(ie->baseExpr, preFunc, postFunc, data);
|
||||
ie->index = (Expr *)WalkAST(ie->index, preFunc, postFunc, data);
|
||||
}
|
||||
else if ((me = dynamic_cast<MemberExpr *>(node)) != NULL)
|
||||
me->expr = (Expr *)WalkAST(me->expr, preFunc, postFunc, data);
|
||||
else if ((tce = dynamic_cast<TypeCastExpr *>(node)) != NULL)
|
||||
tce->expr = (Expr *)WalkAST(tce->expr, preFunc, postFunc, data);
|
||||
else if ((re = dynamic_cast<ReferenceExpr *>(node)) != NULL)
|
||||
re->expr = (Expr *)WalkAST(re->expr, preFunc, postFunc, data);
|
||||
else if ((dre = dynamic_cast<DereferenceExpr *>(node)) != NULL)
|
||||
dre->expr = (Expr *)WalkAST(dre->expr, preFunc, postFunc, data);
|
||||
else if ((soe = dynamic_cast<SizeOfExpr *>(node)) != NULL)
|
||||
soe->expr = (Expr *)WalkAST(soe->expr, preFunc, postFunc, data);
|
||||
else if ((aoe = dynamic_cast<AddressOfExpr *>(node)) != NULL)
|
||||
aoe->expr = (Expr *)WalkAST(aoe->expr, preFunc, postFunc, data);
|
||||
else if (dynamic_cast<SymbolExpr *>(node) != NULL ||
|
||||
dynamic_cast<ConstExpr *>(node) != NULL ||
|
||||
dynamic_cast<FunctionSymbolExpr *>(node) != NULL ||
|
||||
dynamic_cast<SyncExpr *>(node) != NULL ||
|
||||
dynamic_cast<NullPointerExpr *>(node) != NULL) {
|
||||
// nothing to do
|
||||
}
|
||||
else
|
||||
FATAL("Unhandled expression type in WalkAST().");
|
||||
}
|
||||
|
||||
// Call the callback function
|
||||
if (postFunc != NULL)
|
||||
return postFunc(node, data);
|
||||
else
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
static ASTNode *
|
||||
lOptimizeNode(ASTNode *node, void *) {
|
||||
return node->Optimize();
|
||||
}
|
||||
|
||||
|
||||
ASTNode *
|
||||
Optimize(ASTNode *root) {
|
||||
return WalkAST(root, NULL, lOptimizeNode, NULL);
|
||||
}
|
||||
|
||||
|
||||
Expr *
|
||||
Optimize(Expr *expr) {
|
||||
return (Expr *)Optimize((ASTNode *)expr);
|
||||
}
|
||||
|
||||
|
||||
Stmt *
|
||||
Optimize(Stmt *stmt) {
|
||||
return (Stmt *)Optimize((ASTNode *)stmt);
|
||||
}
|
||||
|
||||
|
||||
static ASTNode *
|
||||
lTypeCheckNode(ASTNode *node, void *) {
|
||||
return node->TypeCheck();
|
||||
}
|
||||
|
||||
|
||||
ASTNode *
|
||||
TypeCheck(ASTNode *root) {
|
||||
return WalkAST(root, NULL, lTypeCheckNode, NULL);
|
||||
}
|
||||
|
||||
|
||||
Expr *
|
||||
TypeCheck(Expr *expr) {
|
||||
return (Expr *)TypeCheck((ASTNode *)expr);
|
||||
}
|
||||
|
||||
|
||||
Stmt *
|
||||
TypeCheck(Stmt *stmt) {
|
||||
return (Stmt *)TypeCheck((ASTNode *)stmt);
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
lCostCallback(ASTNode *node, void *c) {
|
||||
int *cost = (int *)c;
|
||||
*cost += node->EstimateCost();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
EstimateCost(ASTNode *root) {
|
||||
int cost = 0;
|
||||
WalkAST(root, lCostCallback, NULL, &cost);
|
||||
return cost;
|
||||
}
|
||||
|
||||
|
||||
61
ast.h
61
ast.h
@@ -53,10 +53,11 @@ public:
|
||||
virtual ~ASTNode();
|
||||
|
||||
/** The Optimize() method should perform any appropriate early-stage
|
||||
optimizations on the node (e.g. constant folding). The caller
|
||||
should use the returned ASTNode * in place of the original node.
|
||||
This method may return NULL if an error is encountered during
|
||||
optimization. */
|
||||
optimizations on the node (e.g. constant folding). This method
|
||||
will be called after the node's children have already been
|
||||
optimized, and the caller will store the returned ASTNode * in
|
||||
place of the original node. This method should return NULL if an
|
||||
error is encountered during optimization. */
|
||||
virtual ASTNode *Optimize() = 0;
|
||||
|
||||
/** Type checking should be performed by the node when this method is
|
||||
@@ -65,6 +66,9 @@ public:
|
||||
pointer in place of the original ASTNode *. */
|
||||
virtual ASTNode *TypeCheck() = 0;
|
||||
|
||||
/** Estimate the execution cost of the node (not including the cost of
|
||||
the children. The value returned should be based on the COST_*
|
||||
enumerant values defined in ispc.h. */
|
||||
virtual int EstimateCost() const = 0;
|
||||
|
||||
/** All AST nodes must track the file position where they are
|
||||
@@ -91,4 +95,53 @@ private:
|
||||
std::vector<Function *> functions;
|
||||
};
|
||||
|
||||
|
||||
/** Callback function type for preorder traversial visiting function for
|
||||
the AST walk.
|
||||
*/
|
||||
typedef bool (* ASTPreCallBackFunc)(ASTNode *node, void *data);
|
||||
|
||||
/** Callback function type for postorder traversial visiting function for
|
||||
the AST walk.
|
||||
*/
|
||||
typedef ASTNode * (* ASTPostCallBackFunc)(ASTNode *node, void *data);
|
||||
|
||||
/** Walk (some portion of) an AST, starting from the given root node. At
|
||||
each node, if preFunc is non-NULL, call it, passing the given void
|
||||
*data pointer; if the call to preFunc function returns false, then the
|
||||
children of the node aren't visited. This function then makes
|
||||
recursive calls to WalkAST() to process the node's children; after
|
||||
doing so, calls postFunc, at the node. The return value from the
|
||||
postFunc call is ignored. */
|
||||
extern ASTNode *WalkAST(ASTNode *root, ASTPreCallBackFunc preFunc,
|
||||
ASTPostCallBackFunc postFunc, void *data);
|
||||
|
||||
/** Perform simple optimizations on the AST or portion thereof passed to
|
||||
this function, returning the resulting AST. */
|
||||
extern ASTNode *Optimize(ASTNode *root);
|
||||
|
||||
/** Convenience version of Optimize() for Expr *s that returns an Expr *
|
||||
(rather than an ASTNode *, which would require the caller to cast back
|
||||
to an Expr *). */
|
||||
extern Expr *Optimize(Expr *);
|
||||
|
||||
/** Convenience version of Optimize() for Expr *s that returns an Stmt *
|
||||
(rather than an ASTNode *, which would require the caller to cast back
|
||||
to a Stmt *). */
|
||||
extern Stmt *Optimize(Stmt *);
|
||||
|
||||
/** Perform type-checking on the given AST (or portion of one), returning a
|
||||
pointer to the root of the resulting AST. */
|
||||
extern ASTNode *TypeCheck(ASTNode *root);
|
||||
|
||||
/** Convenience version of TypeCheck() for Expr *s that returns an Expr *. */
|
||||
extern Expr *TypeCheck(Expr *);
|
||||
|
||||
/** Convenience version of TypeCheck() for Stmt *s that returns an Stmt *. */
|
||||
extern Stmt *TypeCheck(Stmt *);
|
||||
|
||||
/** Returns an estimate of the execution cost of the tree starting at
|
||||
the given root. */
|
||||
extern int EstimateCost(ASTNode *root);
|
||||
|
||||
#endif // ISPC_AST_H
|
||||
|
||||
@@ -11,7 +11,10 @@ length=0
|
||||
|
||||
src=str(sys.argv[1])
|
||||
|
||||
target = re.sub(".*builtins-", "", src)
|
||||
target = re.sub("builtins/target-", "", src)
|
||||
target = re.sub(r"builtins\\target-", "", target)
|
||||
target = re.sub("builtins/", "", target)
|
||||
target = re.sub(r"builtins\\", "", target)
|
||||
target = re.sub("\.ll$", "", target)
|
||||
target = re.sub("\.c$", "", target)
|
||||
target = re.sub("-", "_", target)
|
||||
@@ -23,17 +26,21 @@ if platform.system() == 'Windows' or string.find(platform.system(), "CYGWIN_NT")
|
||||
try:
|
||||
as_out=subprocess.Popen([llvm_as, "-", "-o", "-"], stdout=subprocess.PIPE)
|
||||
except IOError:
|
||||
print >> sys.stderr, "Couldn't open " + src
|
||||
sys.stderr.write("Couldn't open " + src)
|
||||
sys.exit(1)
|
||||
|
||||
print "unsigned char builtins_bitcode_" + target + "[] = {"
|
||||
for line in as_out.stdout.readlines():
|
||||
length = length + len(line)
|
||||
for c in line:
|
||||
print ord(c)
|
||||
print ", "
|
||||
print " 0 };\n\n"
|
||||
print "int builtins_bitcode_" + target + "_length = " + str(length) + ";\n"
|
||||
width = 16;
|
||||
sys.stdout.write("unsigned char builtins_bitcode_" + target + "[] = {\n")
|
||||
|
||||
data = as_out.stdout.read()
|
||||
for i in range(0, len(data), 1):
|
||||
sys.stdout.write("0x%0.2X, " % ord(data[i:i+1]))
|
||||
|
||||
if i%width == (width-1):
|
||||
sys.stdout.write("\n")
|
||||
|
||||
sys.stdout.write("0x00 };\n\n")
|
||||
sys.stdout.write("int builtins_bitcode_" + target + "_length = " + str(i+1) + ";\n")
|
||||
|
||||
as_out.wait()
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ REM Both the LLVM binaries and python need to be in the path
|
||||
set path=%LLVM_INSTALL_DIR%\bin;%PATH%;c:\cygwin\bin
|
||||
|
||||
msbuild ispc.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release
|
||||
msbuild ispc_test.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release
|
||||
|
||||
msbuild examples\examples.sln /V:m /p:Platform=x64 /p:Configuration=Release /t:rebuild
|
||||
msbuild examples\examples.sln /V:m /p:Platform=x64 /p:Configuration=Debug /t:rebuild
|
||||
|
||||
11
buildispc.bat
Normal file
11
buildispc.bat
Normal file
@@ -0,0 +1,11 @@
|
||||
@echo off
|
||||
|
||||
REM If LLVM_INSTALL_DIR isn't set globally in your environment,
|
||||
REM it can be set here_
|
||||
set LLVM_INSTALL_DIR=c:\users\mmp\llvm-dev
|
||||
set LLVM_VERSION=3.1svn
|
||||
|
||||
REM Both the LLVM binaries and python need to be in the path
|
||||
set path=%LLVM_INSTALL_DIR%\bin;%PATH%;c:\cygwin\bin
|
||||
|
||||
msbuild ispc.vcxproj /V:m /p:Platform=Win32 /p:Configuration=Release
|
||||
143
builtins.cpp
143
builtins.cpp
@@ -99,6 +99,9 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
||||
|
||||
// varying
|
||||
if (LLVMTypes::MaskType != LLVMTypes::Int32VectorType &&
|
||||
t == LLVMTypes::MaskType)
|
||||
return AtomicType::VaryingBool;
|
||||
else if (t == LLVMTypes::Int8VectorType)
|
||||
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
|
||||
else if (t == LLVMTypes::Int16VectorType)
|
||||
@@ -194,7 +197,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
||||
// symbol creation code below assumes that any LLVM vector of i32s is a
|
||||
// varying int32. Here, we need that to be interpreted as a varying
|
||||
// bool, so just have a one-off override for that one...
|
||||
if (name == "__sext_varying_bool") {
|
||||
if (g->target.maskBitCount != 1 && name == "__sext_varying_bool") {
|
||||
const Type *returnType = AtomicType::VaryingInt32;
|
||||
std::vector<const Type *> argTypes;
|
||||
argTypes.push_back(AtomicType::VaryingBool);
|
||||
@@ -257,7 +260,7 @@ static void
|
||||
lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
#if 0
|
||||
// FIXME: handle globals?
|
||||
assert(module->global_empty());
|
||||
Assert(module->global_empty());
|
||||
#endif
|
||||
|
||||
llvm::Module::iterator iter;
|
||||
@@ -287,11 +290,11 @@ lCheckModuleIntrinsics(llvm::Module *module) {
|
||||
// check the llvm.x86.* intrinsics for now...
|
||||
if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) {
|
||||
llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID();
|
||||
assert(id != 0);
|
||||
Assert(id != 0);
|
||||
LLVM_TYPE_CONST llvm::Type *intrinsicType =
|
||||
llvm::Intrinsic::getType(*g->ctx, id);
|
||||
intrinsicType = llvm::PointerType::get(intrinsicType, 0);
|
||||
assert(func->getType() == intrinsicType);
|
||||
Assert(func->getType() == intrinsicType);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -311,8 +314,12 @@ lCheckModuleIntrinsics(llvm::Module *module) {
|
||||
static void
|
||||
lSetInternalFunctions(llvm::Module *module) {
|
||||
const char *names[] = {
|
||||
"__add_float",
|
||||
"__add_int32",
|
||||
"__add_uniform_double",
|
||||
"__add_uniform_int32",
|
||||
"__add_uniform_int64",
|
||||
"__add_varying_double",
|
||||
"__add_varying_int32",
|
||||
"__add_varying_int64",
|
||||
"__aos_to_soa3_float",
|
||||
@@ -371,10 +378,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__atomic_xor_uniform_int64_global",
|
||||
"__broadcast_double",
|
||||
"__broadcast_float",
|
||||
"__broadcast_int16",
|
||||
"__broadcast_int32",
|
||||
"__broadcast_int64",
|
||||
"__broadcast_int8",
|
||||
"__broadcast_i16",
|
||||
"__broadcast_i32",
|
||||
"__broadcast_i64",
|
||||
"__broadcast_i8",
|
||||
"__ceil_uniform_double",
|
||||
"__ceil_uniform_float",
|
||||
"__ceil_varying_double",
|
||||
@@ -476,10 +483,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__reduce_min_uint64",
|
||||
"__rotate_double",
|
||||
"__rotate_float",
|
||||
"__rotate_int16",
|
||||
"__rotate_int32",
|
||||
"__rotate_int64",
|
||||
"__rotate_int8",
|
||||
"__rotate_i16",
|
||||
"__rotate_i32",
|
||||
"__rotate_i64",
|
||||
"__rotate_i8",
|
||||
"__round_uniform_double",
|
||||
"__round_uniform_float",
|
||||
"__round_varying_double",
|
||||
@@ -490,16 +497,16 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__sext_varying_bool",
|
||||
"__shuffle2_double",
|
||||
"__shuffle2_float",
|
||||
"__shuffle2_int16",
|
||||
"__shuffle2_int32",
|
||||
"__shuffle2_int64",
|
||||
"__shuffle2_int8",
|
||||
"__shuffle2_i16",
|
||||
"__shuffle2_i32",
|
||||
"__shuffle2_i64",
|
||||
"__shuffle2_i8",
|
||||
"__shuffle_double",
|
||||
"__shuffle_float",
|
||||
"__shuffle_int16",
|
||||
"__shuffle_int32",
|
||||
"__shuffle_int64",
|
||||
"__shuffle_int8",
|
||||
"__shuffle_i16",
|
||||
"__shuffle_i32",
|
||||
"__shuffle_i64",
|
||||
"__shuffle_i8",
|
||||
"__soa_to_aos3_float",
|
||||
"__soa_to_aos3_float16",
|
||||
"__soa_to_aos3_float4",
|
||||
@@ -543,12 +550,16 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__svml_pow",
|
||||
"__undef_uniform",
|
||||
"__undef_varying",
|
||||
"__vec4_add_float",
|
||||
"__vec4_add_int32",
|
||||
"__vselect_float",
|
||||
"__vselect_i32",
|
||||
};
|
||||
|
||||
int count = sizeof(names) / sizeof(names[0]);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
llvm::Function *f = module->getFunction(names[i]);
|
||||
if (f != NULL)
|
||||
if (f != NULL && f->empty() == false)
|
||||
f->setLinkage(llvm::GlobalValue::InternalLinkage);
|
||||
}
|
||||
}
|
||||
@@ -583,9 +594,9 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
// linking together modules with incompatible target triples..
|
||||
llvm::Triple mTriple(m->module->getTargetTriple());
|
||||
llvm::Triple bcTriple(bcModule->getTargetTriple());
|
||||
assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
||||
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
||||
mTriple.getArch() == bcTriple.getArch());
|
||||
assert(bcTriple.getVendor() == llvm::Triple::UnknownVendor ||
|
||||
Assert(bcTriple.getVendor() == llvm::Triple::UnknownVendor ||
|
||||
mTriple.getVendor() == bcTriple.getVendor());
|
||||
bcModule->setTargetTriple(mTriple.str());
|
||||
|
||||
@@ -631,7 +642,7 @@ lDefineConstantIntFunc(const char *name, int val, llvm::Module *module,
|
||||
Symbol *sym = new Symbol(name, SourcePos(), ft, SC_STATIC);
|
||||
|
||||
llvm::Function *func = module->getFunction(name);
|
||||
assert(func != NULL); // it should be declared already...
|
||||
Assert(func != NULL); // it should be declared already...
|
||||
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
llvm::BasicBlock *bblock = llvm::BasicBlock::Create(*g->ctx, "entry", func, 0);
|
||||
llvm::ReturnInst::Create(*g->ctx, LLVMInt32(val), bblock);
|
||||
@@ -706,11 +717,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
extern int builtins_bitcode_sse4_x2_length;
|
||||
switch (g->target.vectorWidth) {
|
||||
case 4:
|
||||
AddBitcodeToModule(builtins_bitcode_sse4, builtins_bitcode_sse4_length,
|
||||
AddBitcodeToModule(builtins_bitcode_sse4,
|
||||
builtins_bitcode_sse4_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 8:
|
||||
AddBitcodeToModule(builtins_bitcode_sse4_x2, builtins_bitcode_sse4_x2_length,
|
||||
AddBitcodeToModule(builtins_bitcode_sse4_x2,
|
||||
builtins_bitcode_sse4_x2_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
default:
|
||||
@@ -720,21 +733,70 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
case Target::AVX:
|
||||
switch (g->target.vectorWidth) {
|
||||
case 8:
|
||||
extern unsigned char builtins_bitcode_avx[];
|
||||
extern int builtins_bitcode_avx_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx, builtins_bitcode_avx_length,
|
||||
extern unsigned char builtins_bitcode_avx1[];
|
||||
extern int builtins_bitcode_avx1_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx1,
|
||||
builtins_bitcode_avx1_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 16:
|
||||
extern unsigned char builtins_bitcode_avx_x2[];
|
||||
extern int builtins_bitcode_avx_x2_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx_x2, builtins_bitcode_avx_x2_length,
|
||||
extern unsigned char builtins_bitcode_avx1_x2[];
|
||||
extern int builtins_bitcode_avx1_x2_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx1_x2,
|
||||
builtins_bitcode_avx1_x2_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error in DefineStdlib");
|
||||
}
|
||||
break;
|
||||
case Target::AVX2:
|
||||
switch (g->target.vectorWidth) {
|
||||
case 8:
|
||||
extern unsigned char builtins_bitcode_avx2[];
|
||||
extern int builtins_bitcode_avx2_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx2,
|
||||
builtins_bitcode_avx2_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 16:
|
||||
extern unsigned char builtins_bitcode_avx2_x2[];
|
||||
extern int builtins_bitcode_avx2_x2_length;
|
||||
AddBitcodeToModule(builtins_bitcode_avx2_x2,
|
||||
builtins_bitcode_avx2_x2_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error in DefineStdlib");
|
||||
}
|
||||
break;
|
||||
case Target::GENERIC:
|
||||
switch (g->target.vectorWidth) {
|
||||
case 4:
|
||||
extern unsigned char builtins_bitcode_generic_4[];
|
||||
extern int builtins_bitcode_generic_4_length;
|
||||
AddBitcodeToModule(builtins_bitcode_generic_4,
|
||||
builtins_bitcode_generic_4_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 8:
|
||||
extern unsigned char builtins_bitcode_generic_8[];
|
||||
extern int builtins_bitcode_generic_8_length;
|
||||
AddBitcodeToModule(builtins_bitcode_generic_8,
|
||||
builtins_bitcode_generic_8_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 16:
|
||||
extern unsigned char builtins_bitcode_generic_16[];
|
||||
extern int builtins_bitcode_generic_16_length;
|
||||
AddBitcodeToModule(builtins_bitcode_generic_16,
|
||||
builtins_bitcode_generic_16_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error in DefineStdlib");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error");
|
||||
}
|
||||
@@ -762,11 +824,16 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
if (includeStdlibISPC) {
|
||||
// If the user wants the standard library to be included, parse the
|
||||
// serialized version of the stdlib.ispc file to get its
|
||||
// definitions added. Disable emission of performance warnings for
|
||||
// now, since the user doesn't care about any of that in the stdlib
|
||||
// implementation...
|
||||
extern char stdlib_code[];
|
||||
yy_scan_string(stdlib_code);
|
||||
yyparse();
|
||||
// definitions added.
|
||||
if (g->target.isa == Target::GENERIC) {
|
||||
extern char stdlib_generic_code[];
|
||||
yy_scan_string(stdlib_generic_code);
|
||||
yyparse();
|
||||
}
|
||||
else {
|
||||
extern char stdlib_x86_code[];
|
||||
yy_scan_string(stdlib_x86_code);
|
||||
yyparse();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,7 +149,7 @@ void __do_print(const char *format, const char *types, int width, int mask,
|
||||
|
||||
|
||||
int __num_cores() {
|
||||
#ifdef _MSC_VER
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
// This is quite a hack. Including all of windows.h to get this definition
|
||||
// pulls in a bunch of stuff that leads to undefined symbols at link time.
|
||||
// So we don't #include <windows.h> but instead have the equivalent declarations
|
||||
@@ -48,23 +48,42 @@ declare void @abort() noreturn
|
||||
;; corresponding to one of the Target::ISA enumerant values that gives the
|
||||
;; most capable ISA that the curremt system can run.
|
||||
;;
|
||||
;; #ifdef _MSC_VER
|
||||
;; extern void __stdcall __cpuid(int info[4], int infoType);
|
||||
;; #else
|
||||
;; Note: clang from LLVM 2.9 should be used if this is updated, for maximum
|
||||
;; backwards compatibility for anyone building ispc with LLVM 2.9.
|
||||
;;
|
||||
;; #include <stdint.h>
|
||||
;; #include <stdlib.h>
|
||||
;;
|
||||
;; static void __cpuid(int info[4], int infoType) {
|
||||
;; __asm__ __volatile__ ("cpuid"
|
||||
;; : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
|
||||
;; : "0" (infoType));
|
||||
;; }
|
||||
;; #endif
|
||||
;;
|
||||
;; /* Save %ebx in case it's the PIC register */
|
||||
;; static void __cpuid_count(int info[4], int level, int count) {
|
||||
;; __asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t"
|
||||
;; "cpuid\n\t"
|
||||
;; "xchg{l}\t{%%}ebx, %1\n\t"
|
||||
;; : "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3])
|
||||
;; : "0" (level), "2" (count));
|
||||
;; }
|
||||
;;
|
||||
;; int32_t __get_system_isa() {
|
||||
;; int info[4];
|
||||
;; __cpuid(info, 1);
|
||||
;;
|
||||
;; /* NOTE: the values returned below must be the same as the
|
||||
;; corresponding enumerant values in Target::ISA. */
|
||||
;; if ((info[2] & (1 << 28)) != 0)
|
||||
;; return 2; // AVX
|
||||
;; if ((info[2] & (1 << 28)) != 0) {
|
||||
;; // AVX1 for sure. Do we have AVX2?
|
||||
;; // Call cpuid with eax=7, ecx=0
|
||||
;; __cpuid_count(info, 7, 0);
|
||||
;; if ((info[1] & (1 << 5)) != 0)
|
||||
;; return 3; // AVX2
|
||||
;; else
|
||||
;; return 2; // AVX1
|
||||
;; }
|
||||
;; else if ((info[2] & (1 << 19)) != 0)
|
||||
;; return 1; // SSE4
|
||||
;; else if ((info[3] & (1 << 26)) != 0)
|
||||
@@ -76,33 +95,42 @@ declare void @abort() noreturn
|
||||
%0 = type { i32, i32, i32, i32 }
|
||||
|
||||
define i32 @__get_system_isa() nounwind ssp {
|
||||
%1 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
|
||||
%2 = extractvalue %0 %1, 2
|
||||
%3 = extractvalue %0 %1, 3
|
||||
%4 = and i32 %2, 268435456
|
||||
%5 = icmp eq i32 %4, 0
|
||||
br i1 %5, label %6, label %13
|
||||
entry:
|
||||
%0 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
|
||||
%asmresult9.i = extractvalue %0 %0, 2
|
||||
%asmresult10.i = extractvalue %0 %0, 3
|
||||
%and = and i32 %asmresult9.i, 268435456
|
||||
%cmp = icmp eq i32 %and, 0
|
||||
br i1 %cmp, label %if.else7, label %if.then
|
||||
|
||||
; <label>:6 ; preds = %0
|
||||
%7 = and i32 %2, 524288
|
||||
%8 = icmp eq i32 %7, 0
|
||||
br i1 %8, label %9, label %13
|
||||
if.then: ; preds = %entry
|
||||
%1 = tail call %0 asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind
|
||||
%asmresult9.i24 = extractvalue %0 %1, 1
|
||||
%and4 = lshr i32 %asmresult9.i24, 5
|
||||
%2 = and i32 %and4, 1
|
||||
%3 = or i32 %2, 2
|
||||
br label %return
|
||||
|
||||
; <label>:9 ; preds = %6
|
||||
%10 = and i32 %3, 67108864
|
||||
%11 = icmp eq i32 %10, 0
|
||||
br i1 %11, label %12, label %13
|
||||
if.else7: ; preds = %entry
|
||||
%and10 = and i32 %asmresult9.i, 524288
|
||||
%cmp11 = icmp eq i32 %and10, 0
|
||||
br i1 %cmp11, label %if.else13, label %return
|
||||
|
||||
; <label>:12 ; preds = %9
|
||||
if.else13: ; preds = %if.else7
|
||||
%and16 = and i32 %asmresult10.i, 67108864
|
||||
%cmp17 = icmp eq i32 %and16, 0
|
||||
br i1 %cmp17, label %if.else19, label %return
|
||||
|
||||
if.else19: ; preds = %if.else13
|
||||
tail call void @abort() noreturn nounwind
|
||||
unreachable
|
||||
|
||||
; <label>:13 ; preds = %9, %6, %0
|
||||
%.0 = phi i32 [ 2, %0 ], [ 1, %6 ], [ 0, %9 ]
|
||||
ret i32 %.0
|
||||
return: ; preds = %if.else13, %if.else7, %if.then
|
||||
%retval.0 = phi i32 [ %3, %if.then ], [ 1, %if.else7 ], [ 0, %if.else13 ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; This function is called by each of the dispatch functions we generate;
|
||||
;; it sets @__system_best_isa if it is unset.
|
||||
|
||||
@@ -32,6 +32,11 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; AVX target implementation.
|
||||
|
||||
ctlztz()
|
||||
define_prefetches()
|
||||
define_shuffles()
|
||||
aossoa()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
@@ -32,12 +32,16 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Basic 16-wide definitions
|
||||
|
||||
stdlib_core(16)
|
||||
packed_load_and_store(16)
|
||||
scans(16)
|
||||
int64minmax(16)
|
||||
define(`WIDTH',`16')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-avx-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-avx-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
@@ -166,33 +170,6 @@ define <16 x float> @__min_varying_float(<16 x float>,
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops
|
||||
|
||||
@@ -381,13 +358,13 @@ load_and_broadcast(16, i32, 32)
|
||||
load_and_broadcast(16, i64, 64)
|
||||
|
||||
; no masked load instruction for i8 and i16 types??
|
||||
load_masked(16, i8, 8, 1)
|
||||
load_masked(16, i16, 16, 2)
|
||||
masked_load(16, i8, 8, 1)
|
||||
masked_load(16, i16, 16, 2)
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8 *, <8 x float> %mask)
|
||||
declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8 *, <4 x double> %mask)
|
||||
|
||||
define <16 x i32> @__load_masked_32(i8 *, <16 x i32> %mask) nounwind alwaysinline {
|
||||
define <16 x i32> @__masked_load_32(i8 *, <16 x i32> %mask) nounwind alwaysinline {
|
||||
%floatmask = bitcast <16 x i32> %mask to <16 x float>
|
||||
%mask0 = shufflevector <16 x float> %floatmask, <16 x float> undef,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
@@ -405,7 +382,7 @@ define <16 x i32> @__load_masked_32(i8 *, <16 x i32> %mask) nounwind alwaysinlin
|
||||
}
|
||||
|
||||
|
||||
define <16 x i64> @__load_masked_64(i8 *, <16 x i32> %mask) nounwind alwaysinline {
|
||||
define <16 x i64> @__masked_load_64(i8 *, <16 x i32> %mask) nounwind alwaysinline {
|
||||
; double up masks, bitcast to doubles
|
||||
%mask0 = shufflevector <16 x i32> %mask, <16 x i32> undef,
|
||||
<8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
|
||||
@@ -618,12 +595,7 @@ define void @__masked_store_blend_64(<16 x i64>* nocapture %ptr, <16 x i64> %new
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
gen_gather(16, i8)
|
||||
gen_gather(16, i16)
|
||||
gen_gather(16, i32)
|
||||
gen_gather(16, i64)
|
||||
;; scatter
|
||||
|
||||
gen_scatter(16, i8)
|
||||
gen_scatter(16, i16)
|
||||
@@ -32,12 +32,16 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Basic 8-wide definitions
|
||||
|
||||
stdlib_core(8)
|
||||
packed_load_and_store(8)
|
||||
scans(8)
|
||||
int64minmax(8)
|
||||
define(`WIDTH',`8')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-avx-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-avx-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
@@ -166,33 +170,6 @@ define <8 x float> @__min_varying_float(<8 x float>,
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops
|
||||
|
||||
@@ -234,7 +211,7 @@ reduce_equal(8)
|
||||
;; horizontal int32 ops
|
||||
|
||||
define <8 x i32> @__add_varying_int32(<8 x i32>,
|
||||
<8 x i32>) nounwind readnone alwaysinline {
|
||||
<8 x i32>) nounwind readnone alwaysinline {
|
||||
%s = add <8 x i32> %0, %1
|
||||
ret <8 x i32> %s
|
||||
}
|
||||
@@ -310,7 +287,7 @@ define double @__reduce_max_double(<8 x double>) nounwind readnone alwaysinline
|
||||
;; horizontal int64 ops
|
||||
|
||||
define <8 x i64> @__add_varying_int64(<8 x i64>,
|
||||
<8 x i64>) nounwind readnone alwaysinline {
|
||||
<8 x i64>) nounwind readnone alwaysinline {
|
||||
%s = add <8 x i64> %0, %1
|
||||
ret <8 x i64> %s
|
||||
}
|
||||
@@ -362,13 +339,13 @@ load_and_broadcast(8, i32, 32)
|
||||
load_and_broadcast(8, i64, 64)
|
||||
|
||||
; no masked load instruction for i8 and i16 types??
|
||||
load_masked(8, i8, 8, 1)
|
||||
load_masked(8, i16, 16, 2)
|
||||
masked_load(8, i8, 8, 1)
|
||||
masked_load(8, i16, 16, 2)
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8 *, <8 x float> %mask)
|
||||
declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8 *, <4 x double> %mask)
|
||||
|
||||
define <8 x i32> @__load_masked_32(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
define <8 x i32> @__masked_load_32(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%floatmask = bitcast <8 x i32> %mask to <8 x float>
|
||||
%floatval = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8 * %0, <8 x float> %floatmask)
|
||||
%retval = bitcast <8 x float> %floatval to <8 x i32>
|
||||
@@ -376,7 +353,7 @@ define <8 x i32> @__load_masked_32(i8 *, <8 x i32> %mask) nounwind alwaysinline
|
||||
}
|
||||
|
||||
|
||||
define <8 x i64> @__load_masked_64(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
define <8 x i64> @__masked_load_64(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
; double up masks, bitcast to doubles
|
||||
%mask0 = shufflevector <8 x i32> %mask, <8 x i32> undef,
|
||||
<8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
|
||||
@@ -399,9 +376,6 @@ define <8 x i64> @__load_masked_64(i8 *, <8 x i32> %mask) nounwind alwaysinline
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
; FIXME: there is no AVX instruction for these, but we could be clever
|
||||
; by packing the bits down and setting the last 3/4 or half, respectively,
|
||||
; of the mask to zero... Not sure if this would be a win in the end
|
||||
gen_masked_store(8, i8, 8)
|
||||
gen_masked_store(8, i16, 16)
|
||||
|
||||
@@ -516,12 +490,7 @@ define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
gen_gather(8, i8)
|
||||
gen_gather(8, i16)
|
||||
gen_gather(8, i32)
|
||||
gen_gather(8, i64)
|
||||
;; scatter
|
||||
|
||||
gen_scatter(8, i8)
|
||||
gen_scatter(8, i16)
|
||||
69
builtins/target-avx1-x2.ll
Normal file
69
builtins/target-avx1-x2.ll
Normal file
@@ -0,0 +1,69 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(`target-avx-x2.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
gen_gather(16, i8)
|
||||
gen_gather(16, i16)
|
||||
gen_gather(16, i32)
|
||||
gen_gather(16, i64)
|
||||
|
||||
|
||||
70
builtins/target-avx1.ll
Normal file
70
builtins/target-avx1.ll
Normal file
@@ -0,0 +1,70 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(`target-avx.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
gen_gather(8, i8)
|
||||
gen_gather(8, i16)
|
||||
gen_gather(8, i32)
|
||||
gen_gather(8, i64)
|
||||
|
||||
|
||||
|
||||
74
builtins/target-avx2-x2.ll
Normal file
74
builtins/target-avx2-x2.ll
Normal file
@@ -0,0 +1,74 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(`target-avx-x2.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
|
||||
define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary8to16(m, i32, @llvm.x86.avx2.pmins.d, %0, %1)
|
||||
ret <16 x i32> %m
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary8to16(m, i32, @llvm.x86.avx2.pmaxs.d, %0, %1)
|
||||
ret <16 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
|
||||
define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary8to16(m, i32, @llvm.x86.avx2.pminu.d, %0, %1)
|
||||
ret <16 x i32> %m
|
||||
}
|
||||
|
||||
define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
|
||||
binary8to16(m, i32, @llvm.x86.avx2.pmaxu.d, %0, %1)
|
||||
ret <16 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
gen_gather(16, i8)
|
||||
gen_gather(16, i16)
|
||||
gen_gather(16, i32)
|
||||
gen_gather(16, i64)
|
||||
|
||||
|
||||
75
builtins/target-avx2.ll
Normal file
75
builtins/target-avx2.ll
Normal file
@@ -0,0 +1,75 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(`target-avx.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
|
||||
define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %m
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
|
||||
define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %m
|
||||
}
|
||||
|
||||
define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
%m = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %m
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
gen_gather(8, i8)
|
||||
gen_gather(8, i16)
|
||||
gen_gather(8, i32)
|
||||
gen_gather(8, i64)
|
||||
|
||||
|
||||
|
||||
34
builtins/target-generic-16.ll
Normal file
34
builtins/target-generic-16.ll
Normal file
@@ -0,0 +1,34 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
define(`WIDTH',`16')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
34
builtins/target-generic-4.ll
Normal file
34
builtins/target-generic-4.ll
Normal file
@@ -0,0 +1,34 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
define(`WIDTH',`4')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
34
builtins/target-generic-8.ll
Normal file
34
builtins/target-generic-8.ll
Normal file
@@ -0,0 +1,34 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
define(`WIDTH',`8')
|
||||
include(`target-generic-common.ll')
|
||||
|
||||
328
builtins/target-generic-common.ll
Normal file
328
builtins/target-generic-common.ll
Normal file
@@ -0,0 +1,328 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
define(`MASK',`i1')
|
||||
include(`util.m4')
|
||||
|
||||
stdlib_core()
|
||||
scans()
|
||||
reduce_equal(WIDTH)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; broadcast/rotate/shuffle
|
||||
|
||||
declare <WIDTH x float> @__smear_float(float) nounwind readnone
|
||||
declare <WIDTH x double> @__smear_double(double) nounwind readnone
|
||||
declare <WIDTH x i8> @__smear_i8(i8) nounwind readnone
|
||||
declare <WIDTH x i16> @__smear_i16(i16) nounwind readnone
|
||||
declare <WIDTH x i32> @__smear_i32(i32) nounwind readnone
|
||||
declare <WIDTH x i64> @__smear_i64(i64) nounwind readnone
|
||||
|
||||
declare <WIDTH x float> @__broadcast_float(<WIDTH x float>, i32) nounwind readnone
|
||||
declare <WIDTH x double> @__broadcast_double(<WIDTH x double>, i32) nounwind readnone
|
||||
declare <WIDTH x i8> @__broadcast_i8(<WIDTH x i8>, i32) nounwind readnone
|
||||
declare <WIDTH x i16> @__broadcast_i16(<WIDTH x i16>, i32) nounwind readnone
|
||||
declare <WIDTH x i32> @__broadcast_i32(<WIDTH x i32>, i32) nounwind readnone
|
||||
declare <WIDTH x i64> @__broadcast_i64(<WIDTH x i64>, i32) nounwind readnone
|
||||
|
||||
declare <WIDTH x i8> @__rotate_i8(<WIDTH x i8>, i32) nounwind readnone
|
||||
declare <WIDTH x i16> @__rotate_i16(<WIDTH x i16>, i32) nounwind readnone
|
||||
declare <WIDTH x float> @__rotate_float(<WIDTH x float>, i32) nounwind readnone
|
||||
declare <WIDTH x i32> @__rotate_i32(<WIDTH x i32>, i32) nounwind readnone
|
||||
declare <WIDTH x double> @__rotate_double(<WIDTH x double>, i32) nounwind readnone
|
||||
declare <WIDTH x i64> @__rotate_i64(<WIDTH x i64>, i32) nounwind readnone
|
||||
|
||||
declare <WIDTH x i8> @__shuffle_i8(<WIDTH x i8>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i8> @__shuffle2_i8(<WIDTH x i8>, <WIDTH x i8>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i16> @__shuffle_i16(<WIDTH x i16>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i16> @__shuffle2_i16(<WIDTH x i16>, <WIDTH x i16>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x float> @__shuffle_float(<WIDTH x float>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x float> @__shuffle2_float(<WIDTH x float>, <WIDTH x float>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i32> @__shuffle_i32(<WIDTH x i32>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i32> @__shuffle2_i32(<WIDTH x i32>, <WIDTH x i32>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x double> @__shuffle_double(<WIDTH x double>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x double> @__shuffle2_double(<WIDTH x double>,
|
||||
<WIDTH x double>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i64> @__shuffle_i64(<WIDTH x i64>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i64> @__shuffle2_i64(<WIDTH x i64>, <WIDTH x i64>,
|
||||
<WIDTH x i32>) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; aos/soa
|
||||
|
||||
declare void @__soa_to_aos3_float(<WIDTH x float> %v0, <WIDTH x float> %v1,
|
||||
<WIDTH x float> %v2, float * noalias %p) nounwind
|
||||
declare void @__aos_to_soa3_float(float * noalias %p, <WIDTH x float> * %out0,
|
||||
<WIDTH x float> * %out1, <WIDTH x float> * %out2) nounwind
|
||||
declare void @__soa_to_aos4_float(<WIDTH x float> %v0, <WIDTH x float> %v1,
|
||||
<WIDTH x float> %v2, <WIDTH x float> %v3,
|
||||
float * noalias %p) nounwind
|
||||
declare void @__aos_to_soa4_float(float * noalias %p, <WIDTH x float> * noalias %out0,
|
||||
<WIDTH x float> * noalias %out1,
|
||||
<WIDTH x float> * noalias %out2,
|
||||
<WIDTH x float> * noalias %out3) nounwind
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; math
|
||||
|
||||
declare void @__fastmath() nounwind
|
||||
|
||||
;; round/floor/ceil
|
||||
|
||||
declare float @__round_uniform_float(float) nounwind readnone
|
||||
declare float @__floor_uniform_float(float) nounwind readnone
|
||||
declare float @__ceil_uniform_float(float) nounwind readnone
|
||||
|
||||
declare double @__round_uniform_double(double) nounwind readnone
|
||||
declare double @__floor_uniform_double(double) nounwind readnone
|
||||
declare double @__ceil_uniform_double(double) nounwind readnone
|
||||
|
||||
declare <WIDTH x float> @__round_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__floor_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__ceil_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x double> @__round_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__floor_varying_double(<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__ceil_varying_double(<WIDTH x double>) nounwind readnone
|
||||
|
||||
;; min/max
|
||||
|
||||
declare float @__max_uniform_float(float, float) nounwind readnone
|
||||
declare float @__min_uniform_float(float, float) nounwind readnone
|
||||
declare i32 @__min_uniform_int32(i32, i32) nounwind readnone
|
||||
declare i32 @__max_uniform_int32(i32, i32) nounwind readnone
|
||||
declare i32 @__min_uniform_uint32(i32, i32) nounwind readnone
|
||||
declare i32 @__max_uniform_uint32(i32, i32) nounwind readnone
|
||||
declare i64 @__min_uniform_int64(i64, i64) nounwind readnone
|
||||
declare i64 @__max_uniform_int64(i64, i64) nounwind readnone
|
||||
declare i64 @__min_uniform_uint64(i64, i64) nounwind readnone
|
||||
declare i64 @__max_uniform_uint64(i64, i64) nounwind readnone
|
||||
declare double @__min_uniform_double(double, double) nounwind readnone
|
||||
declare double @__max_uniform_double(double, double) nounwind readnone
|
||||
|
||||
declare <WIDTH x float> @__max_varying_float(<WIDTH x float>,
|
||||
<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__min_varying_float(<WIDTH x float>,
|
||||
<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||
declare <WIDTH x i64> @__min_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||
declare <WIDTH x i64> @__max_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||
declare <WIDTH x i64> @__min_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||
declare <WIDTH x i64> @__max_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||
declare <WIDTH x double> @__min_varying_double(<WIDTH x double>,
|
||||
<WIDTH x double>) nounwind readnone
|
||||
declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,
|
||||
<WIDTH x double>) nounwind readnone
|
||||
|
||||
;; sqrt/rsqrt/rcp
|
||||
|
||||
declare float @__rsqrt_uniform_float(float) nounwind readnone
|
||||
declare float @__rcp_uniform_float(float) nounwind readnone
|
||||
declare float @__sqrt_uniform_float(float) nounwind readnone
|
||||
declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||
|
||||
declare double @__sqrt_uniform_double(double) nounwind readnone
|
||||
declare <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone
|
||||
|
||||
;; bit ops
|
||||
|
||||
declare i32 @__popcnt_int32(i32) nounwind readnone
|
||||
declare i64 @__popcnt_int64(i64) nounwind readnone
|
||||
|
||||
declare i32 @__count_trailing_zeros_i32(i32) nounwind readnone
|
||||
declare i64 @__count_trailing_zeros_i64(i64) nounwind readnone
|
||||
declare i32 @__count_leading_zeros_i32(i32) nounwind readnone
|
||||
declare i64 @__count_leading_zeros_i64(i64) nounwind readnone
|
||||
|
||||
;; svml
|
||||
|
||||
; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
|
||||
; or, use the macro to call the 4-wide ones twice with our 8-wide
|
||||
; vectors...
|
||||
|
||||
declare <WIDTH x float> @__svml_sin(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_cos(<WIDTH x float>)
|
||||
declare void @__svml_sincos(<WIDTH x float>, <WIDTH x float> *, <WIDTH x float> *)
|
||||
declare <WIDTH x float> @__svml_tan(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_atan(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_atan2(<WIDTH x float>, <WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_exp(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_log(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_pow(<WIDTH x float>, <WIDTH x float>)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; reductions
|
||||
|
||||
declare i32 @__movmsk(<WIDTH x i1>) nounwind readnone
|
||||
|
||||
declare float @__reduce_add_float(<WIDTH x float>) nounwind readnone
|
||||
declare float @__reduce_min_float(<WIDTH x float>) nounwind readnone
|
||||
declare float @__reduce_max_float(<WIDTH x float>) nounwind readnone
|
||||
|
||||
declare i32 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone
|
||||
declare i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone
|
||||
declare i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone
|
||||
|
||||
declare i32 @__reduce_add_uint32(<WIDTH x i32>) nounwind readnone
|
||||
declare i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone
|
||||
declare i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone
|
||||
|
||||
declare double @__reduce_add_double(<WIDTH x double>) nounwind readnone
|
||||
declare double @__reduce_min_double(<WIDTH x double>) nounwind readnone
|
||||
declare double @__reduce_max_double(<WIDTH x double>) nounwind readnone
|
||||
|
||||
declare i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone
|
||||
declare i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone
|
||||
declare i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone
|
||||
|
||||
declare i64 @__reduce_add_uint64(<WIDTH x i64>) nounwind readnone
|
||||
declare i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone
|
||||
declare i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
|
||||
load_and_broadcast(WIDTH, i8, 8)
|
||||
load_and_broadcast(WIDTH, i16, 16)
|
||||
load_and_broadcast(WIDTH, i32, 32)
|
||||
load_and_broadcast(WIDTH, i64, 64)
|
||||
|
||||
declare <WIDTH x i8> @__masked_load_8(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||
declare <WIDTH x i16> @__masked_load_16(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||
declare <WIDTH x i32> @__masked_load_32(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||
declare <WIDTH x i64> @__masked_load_64(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||
|
||||
declare void @__masked_store_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
|
||||
<WIDTH x i1> %mask) nounwind
|
||||
|
||||
ifelse(LLVM_VERSION, `LLVM_3_1svn',`
|
||||
define void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||
<WIDTH x i1>) nounwind alwaysinline {
|
||||
%v = load <WIDTH x i8> * %0
|
||||
%v1 = select <WIDTH x i1> %2, <WIDTH x i8> %1, <WIDTH x i8> %v
|
||||
store <WIDTH x i8> %v1, <WIDTH x i8> * %0
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
|
||||
<WIDTH x i1>) nounwind alwaysinline {
|
||||
%v = load <WIDTH x i16> * %0
|
||||
%v1 = select <WIDTH x i1> %2, <WIDTH x i16> %1, <WIDTH x i16> %v
|
||||
store <WIDTH x i16> %v1, <WIDTH x i16> * %0
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||
<WIDTH x i1>) nounwind alwaysinline {
|
||||
%v = load <WIDTH x i32> * %0
|
||||
%v1 = select <WIDTH x i1> %2, <WIDTH x i32> %1, <WIDTH x i32> %v
|
||||
store <WIDTH x i32> %v1, <WIDTH x i32> * %0
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_64(<WIDTH x i64>* nocapture,
|
||||
<WIDTH x i64>, <WIDTH x i1>) nounwind alwaysinline {
|
||||
%v = load <WIDTH x i64> * %0
|
||||
%v1 = select <WIDTH x i1> %2, <WIDTH x i64> %1, <WIDTH x i64> %v
|
||||
store <WIDTH x i64> %v1, <WIDTH x i64> * %0
|
||||
ret void
|
||||
}
|
||||
',`
|
||||
declare void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
|
||||
<WIDTH x i1> %mask) nounwind
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
define(`gather_scatter', `
|
||||
declare <WIDTH x $1> @__gather_base_offsets32_$1(i8 * nocapture, <WIDTH x i32>,
|
||||
i32, <WIDTH x i1>) nounwind readonly
|
||||
declare <WIDTH x $1> @__gather_base_offsets64_$1(i8 * nocapture, <WIDTH x i64>,
|
||||
i32, <WIDTH x i1>) nounwind readonly
|
||||
declare <WIDTH x $1> @__gather32_$1(<WIDTH x i32>,
|
||||
<WIDTH x i1>) nounwind readonly
|
||||
declare <WIDTH x $1> @__gather64_$1(<WIDTH x i64>,
|
||||
<WIDTH x i1>) nounwind readonly
|
||||
|
||||
declare void @__scatter_base_offsets32_$1(i8* nocapture, <WIDTH x i32>,
|
||||
i32, <WIDTH x $1>, <WIDTH x i1>) nounwind
|
||||
declare void @__scatter_base_offsets64_$1(i8* nocapture, <WIDTH x i64>,
|
||||
i32, <WIDTH x $1>, <WIDTH x i1>) nounwind
|
||||
declare void @__scatter32_$1(<WIDTH x i32>, <WIDTH x $1>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__scatter64_$1(<WIDTH x i64>, <WIDTH x $1>,
|
||||
<WIDTH x i1>) nounwind
|
||||
')
|
||||
|
||||
gather_scatter(i8)
|
||||
gather_scatter(i16)
|
||||
gather_scatter(i32)
|
||||
gather_scatter(i64)
|
||||
|
||||
declare i32 @__packed_load_active(i32 * nocapture, <WIDTH x i32> * nocapture,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare i32 @__packed_store_active(i32 * nocapture, <WIDTH x i32> %vals,
|
||||
<WIDTH x i1>) nounwind
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; prefetch
|
||||
|
||||
declare void @__prefetch_read_uniform_1(i8 * nocapture) nounwind
|
||||
declare void @__prefetch_read_uniform_2(i8 * nocapture) nounwind
|
||||
declare void @__prefetch_read_uniform_3(i8 * nocapture) nounwind
|
||||
declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind
|
||||
|
||||
@@ -29,6 +29,11 @@
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
ctlztz()
|
||||
define_prefetches()
|
||||
define_shuffles()
|
||||
aossoa()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
@@ -36,12 +36,16 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; standard 8-wide definitions from m4 macros
|
||||
|
||||
stdlib_core(8)
|
||||
packed_load_and_store(8)
|
||||
scans(8)
|
||||
int64minmax(8)
|
||||
define(`WIDTH',`8')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-sse2-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-sse2-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
@@ -301,7 +305,7 @@ define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
}
|
||||
|
||||
define <4 x float> @__vec4_add_float(<4 x float> %v0,
|
||||
<4 x float> %v1) nounwind readnone alwaysinline {
|
||||
<4 x float> %v1) nounwind readnone alwaysinline {
|
||||
%v = fadd <4 x float> %v0, %v1
|
||||
ret <4 x float> %v
|
||||
}
|
||||
@@ -325,7 +329,7 @@ define float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
||||
|
||||
; helper function for reduce_add_int32
|
||||
define <4 x i32> @__vec4_add_int32(<4 x i32> %v0,
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
%v = add <4 x i32> %v0, %v1
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
@@ -425,10 +429,10 @@ load_and_broadcast(8, i16, 16)
|
||||
load_and_broadcast(8, i32, 32)
|
||||
load_and_broadcast(8, i64, 64)
|
||||
|
||||
load_masked(8, i8, 8, 1)
|
||||
load_masked(8, i16, 16, 2)
|
||||
load_masked(8, i32, 32, 4)
|
||||
load_masked(8, i64, 64, 8)
|
||||
masked_load(8, i8, 8, 1)
|
||||
masked_load(8, i16, 16, 2)
|
||||
masked_load(8, i32, 32, 4)
|
||||
masked_load(8, i64, 64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
@@ -33,12 +33,16 @@
|
||||
;; Define the standard library builtins for the SSE2 target
|
||||
|
||||
; Define some basics for a 4-wide target
|
||||
stdlib_core(4)
|
||||
packed_load_and_store(4)
|
||||
scans(4)
|
||||
int64minmax(4)
|
||||
define(`WIDTH',`4')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-sse2-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-sse2-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding
|
||||
@@ -144,7 +148,7 @@ define <4 x double> @__ceil_varying_double(<4 x double>) nounwind readonly alway
|
||||
; from %1, and otherwise return the value from %0.
|
||||
|
||||
define <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%cleared_old = and <4 x i32> %0, %notmask
|
||||
%masked_new = and <4 x i32> %1, %mask
|
||||
@@ -153,7 +157,7 @@ define <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
|
||||
}
|
||||
|
||||
define <4 x float> @__vselect_float(<4 x float>, <4 x float>,
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
%v0 = bitcast <4 x float> %0 to <4 x i32>
|
||||
%v1 = bitcast <4 x float> %1 to <4 x i32>
|
||||
%r = call <4 x i32> @__vselect_i32(<4 x i32> %v0, <4 x i32> %v1, <4 x i32> %mask)
|
||||
@@ -552,10 +556,10 @@ load_and_broadcast(4, i16, 16)
|
||||
load_and_broadcast(4, i32, 32)
|
||||
load_and_broadcast(4, i64, 64)
|
||||
|
||||
load_masked(4, i8, 8, 1)
|
||||
load_masked(4, i16, 16, 2)
|
||||
load_masked(4, i32, 32, 4)
|
||||
load_masked(4, i64, 64, 8)
|
||||
masked_load(4, i8, 8, 1)
|
||||
masked_load(4, i16, 16, 2)
|
||||
masked_load(4, i32, 32, 4)
|
||||
masked_load(4, i64, 64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
@@ -29,6 +29,11 @@
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
ctlztz()
|
||||
define_prefetches()
|
||||
define_shuffles()
|
||||
aossoa()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding floats
|
||||
|
||||
@@ -36,12 +36,16 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; standard 8-wide definitions from m4 macros
|
||||
|
||||
stdlib_core(8)
|
||||
packed_load_and_store(8)
|
||||
scans(8)
|
||||
int64minmax(8)
|
||||
define(`WIDTH',`8')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-sse4-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-sse4-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
@@ -252,7 +256,7 @@ define float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
||||
|
||||
; helper function for reduce_add_int32
|
||||
define <4 x i32> @__vec4_add_int32(<4 x i32> %v0,
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
%v = add <4 x i32> %v0, %v1
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
@@ -352,10 +356,10 @@ load_and_broadcast(8, i16, 16)
|
||||
load_and_broadcast(8, i32, 32)
|
||||
load_and_broadcast(8, i64, 64)
|
||||
|
||||
load_masked(8, i8, 8, 1)
|
||||
load_masked(8, i16, 16, 2)
|
||||
load_masked(8, i32, 32, 4)
|
||||
load_masked(8, i64, 64, 8)
|
||||
masked_load(8, i8, 8, 1)
|
||||
masked_load(8, i16, 16, 2)
|
||||
masked_load(8, i32, 32, 4)
|
||||
masked_load(8, i64, 64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
@@ -33,12 +33,16 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; Define common 4-wide stuff
|
||||
stdlib_core(4)
|
||||
packed_load_and_store(4)
|
||||
scans(4)
|
||||
int64minmax(4)
|
||||
define(`WIDTH',`4')
|
||||
define(`MASK',`i32')
|
||||
include(`util.m4')
|
||||
|
||||
include(`builtins-sse4-common.ll')
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
|
||||
include(`target-sse4-common.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
@@ -451,10 +455,10 @@ load_and_broadcast(4, i16, 16)
|
||||
load_and_broadcast(4, i32, 32)
|
||||
load_and_broadcast(4, i64, 64)
|
||||
|
||||
load_masked(4, i8, 8, 1)
|
||||
load_masked(4, i16, 16, 2)
|
||||
load_masked(4, i32, 32, 4)
|
||||
load_masked(4, i64, 64, 8)
|
||||
masked_load(4, i8, 8, 1)
|
||||
masked_load(4, i16, 16, 2)
|
||||
masked_load(4, i32, 32, 4)
|
||||
masked_load(4, i64, 64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
File diff suppressed because it is too large
Load Diff
4378
cbackend.cpp
Normal file
4378
cbackend.cpp
Normal file
File diff suppressed because it is too large
Load Diff
130
ctx.h
130
ctx.h
@@ -39,6 +39,7 @@
|
||||
#define ISPC_CTX_H 1
|
||||
|
||||
#include "ispc.h"
|
||||
#include <map>
|
||||
#include <llvm/InstrTypes.h>
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/Analysis/DIBuilder.h>
|
||||
@@ -98,9 +99,9 @@ public:
|
||||
the function entry mask and the internal mask. */
|
||||
llvm::Value *GetFullMask();
|
||||
|
||||
/** Provides the alloca'd pointer to memory to store the full function
|
||||
mask. This is only used to wire up the __mask builtin variable. */
|
||||
void SetMaskPointer(llvm::Value *p);
|
||||
/** Returns a pointer to storage in memory that stores the current full
|
||||
mask. */
|
||||
llvm::Value *GetFullMaskPointer();
|
||||
|
||||
/** Provides the value of the mask at function entry */
|
||||
void SetFunctionMask(llvm::Value *val);
|
||||
@@ -160,10 +161,8 @@ public:
|
||||
void EndLoop();
|
||||
|
||||
/** Indicates that code generation for a 'foreach' or 'foreach_tiled'
|
||||
loop is about to start. The provided basic block pointer indicates
|
||||
where control flow should go if a 'continue' statement is executed
|
||||
in the loop. */
|
||||
void StartForeach(llvm::BasicBlock *continueTarget);
|
||||
loop is about to start. */
|
||||
void StartForeach();
|
||||
void EndForeach();
|
||||
|
||||
/** Emit code for a 'break' statement in a loop. If doCoherenceCheck
|
||||
@@ -186,12 +185,62 @@ public:
|
||||
previous iteration. */
|
||||
void RestoreContinuedLanes();
|
||||
|
||||
/** Indicates that code generation for a "switch" statement is about to
|
||||
start. isUniform indicates whether the "switch" value is uniform,
|
||||
and bbAfterSwitch gives the basic block immediately following the
|
||||
"switch" statement. (For example, if the switch condition is
|
||||
uniform, we jump here upon executing a "break" statement.) */
|
||||
void StartSwitch(bool isUniform, llvm::BasicBlock *bbAfterSwitch);
|
||||
/** Indicates the end of code generation for a "switch" statement. */
|
||||
void EndSwitch();
|
||||
|
||||
/** Emits code for a "switch" statement in the program.
|
||||
@param expr Gives the value of the expression after the "switch"
|
||||
@param defaultBlock Basic block to execute for the "default" case. This
|
||||
should be NULL if there is no "default" label inside
|
||||
the switch.
|
||||
@param caseBlocks vector that stores the mapping from label values
|
||||
after "case" statements to basic blocks corresponding
|
||||
to the "case" labels.
|
||||
@param nextBlocks For each basic block for a "case" or "default"
|
||||
label, this gives the basic block for the
|
||||
immediately-following "case" or "default" label (or
|
||||
the basic block after the "switch" statement for the
|
||||
last label.)
|
||||
*/
|
||||
void SwitchInst(llvm::Value *expr, llvm::BasicBlock *defaultBlock,
|
||||
const std::vector<std::pair<int, llvm::BasicBlock *> > &caseBlocks,
|
||||
const std::map<llvm::BasicBlock *, llvm::BasicBlock *> &nextBlocks);
|
||||
|
||||
/** Generates code for a "default" label after a "switch" statement.
|
||||
The checkMask parameter indicates whether additional code should be
|
||||
generated to check to see if the execution mask is all off after
|
||||
the default label (in which case a jump to the following label will
|
||||
be issued. */
|
||||
void EmitDefaultLabel(bool checkMask, SourcePos pos);
|
||||
|
||||
/** Generates code for a "case" label after a "switch" statement. See
|
||||
the documentation for EmitDefaultLabel() for discussion of the
|
||||
checkMask parameter. */
|
||||
void EmitCaseLabel(int value, bool checkMask, SourcePos pos);
|
||||
|
||||
/** Returns the current number of nested levels of 'varying' control
|
||||
flow */
|
||||
int VaryingCFDepth() const;
|
||||
|
||||
bool InForeachLoop() const;
|
||||
|
||||
void SetContinueTarget(llvm::BasicBlock *bb) { continueTarget = bb; }
|
||||
|
||||
/** Step through the code and find label statements; create a basic
|
||||
block for each one, so that subsequent calls to
|
||||
GetLabeledBasicBlock() return the corresponding basic block. */
|
||||
void InitializeLabelMap(Stmt *code);
|
||||
|
||||
/** If there is a label in the function with the given name, return the
|
||||
new basic block that it starts. */
|
||||
llvm::BasicBlock *GetLabeledBasicBlock(const std::string &label);
|
||||
|
||||
/** Called to generate code for 'return' statement; value is the
|
||||
expression in the return statement (if non-NULL), and
|
||||
doCoherenceCheck indicates whether instructions should be generated
|
||||
@@ -211,6 +260,10 @@ public:
|
||||
i1 value that indicates if all of the mask lanes are on. */
|
||||
llvm::Value *All(llvm::Value *mask);
|
||||
|
||||
/** Given a boolean mask value of type LLVMTypes::MaskType, return an
|
||||
i1 value that indicates if all of the mask lanes are off. */
|
||||
llvm::Value *None(llvm::Value *mask);
|
||||
|
||||
/** Given a boolean mask value of type LLVMTypes::MaskType, return an
|
||||
i32 value wherein the i'th bit is on if and only if the i'th lane
|
||||
of the mask is on. */
|
||||
@@ -446,6 +499,9 @@ private:
|
||||
/** Pointer to the Function for which we're currently generating code. */
|
||||
Function *function;
|
||||
|
||||
/** LLVM function representation for the current function. */
|
||||
llvm::Function *llvmFunction;
|
||||
|
||||
/** The basic block into which we add any alloca instructions that need
|
||||
to go at the very start of the function. */
|
||||
llvm::BasicBlock *allocaBlock;
|
||||
@@ -479,10 +535,10 @@ private:
|
||||
the loop. */
|
||||
llvm::Value *loopMask;
|
||||
|
||||
/** If currently in a loop body, this is a pointer to memory to store a
|
||||
mask value that represents which of the lanes have executed a
|
||||
'break' statement. If we're not in a loop body, this should be
|
||||
NULL. */
|
||||
/** If currently in a loop body or switch statement, this is a pointer
|
||||
to memory to store a mask value that represents which of the lanes
|
||||
have executed a 'break' statement. If we're not in a loop body or
|
||||
switch, this should be NULL. */
|
||||
llvm::Value *breakLanesPtr;
|
||||
|
||||
/** Similar to breakLanesPtr, if we're inside a loop, this is a pointer
|
||||
@@ -490,16 +546,49 @@ private:
|
||||
'continue' statement. */
|
||||
llvm::Value *continueLanesPtr;
|
||||
|
||||
/** If we're inside a loop, this gives the basic block immediately
|
||||
after the current loop, which we will jump to if all of the lanes
|
||||
have executed a break statement or are otherwise done with the
|
||||
loop. */
|
||||
/** If we're inside a loop or switch statement, this gives the basic
|
||||
block immediately after the current loop or switch, which we will
|
||||
jump to if all of the lanes have executed a break statement or are
|
||||
otherwise done with it. */
|
||||
llvm::BasicBlock *breakTarget;
|
||||
|
||||
/** If we're inside a loop, this gives the block to jump to if all of
|
||||
the running lanes have executed a 'continue' statement. */
|
||||
llvm::BasicBlock *continueTarget;
|
||||
|
||||
/** @name Switch statement state
|
||||
|
||||
These variables store various state that's active when we're
|
||||
generating code for a switch statement. They should all be NULL
|
||||
outside of a switch.
|
||||
@{
|
||||
*/
|
||||
|
||||
/** The value of the expression used to determine which case in the
|
||||
statements after the switch to execute. */
|
||||
llvm::Value *switchExpr;
|
||||
|
||||
/** Map from case label numbers to the basic block that will hold code
|
||||
for that case. */
|
||||
const std::vector<std::pair<int, llvm::BasicBlock *> > *caseBlocks;
|
||||
|
||||
/** The basic block of code to run for the "default" label in the
|
||||
switch statement. */
|
||||
llvm::BasicBlock *defaultBlock;
|
||||
|
||||
/** For each basic block for the code for cases (and the default label,
|
||||
if present), this map gives the basic block for the immediately
|
||||
following case/default label. */
|
||||
const std::map<llvm::BasicBlock *, llvm::BasicBlock *> *nextBlocks;
|
||||
|
||||
/** Records whether the switch condition was uniform; this is a
|
||||
distinct notion from whether the switch represents uniform or
|
||||
varying control flow; we may have varying control flow from a
|
||||
uniform switch condition if there is a 'break' inside the switch
|
||||
that's under varying control flow. */
|
||||
bool switchConditionWasUniform;
|
||||
/** @} */
|
||||
|
||||
/** A pointer to memory that records which of the program instances
|
||||
have executed a 'return' statement (and are thus really truly done
|
||||
running any more instructions in this functions. */
|
||||
@@ -537,9 +626,13 @@ private:
|
||||
tasks launched from the current function. */
|
||||
llvm::Value *launchGroupHandlePtr;
|
||||
|
||||
std::map<std::string, llvm::BasicBlock *> labelMap;
|
||||
|
||||
static bool initLabelBBlocks(ASTNode *node, void *data);
|
||||
|
||||
llvm::Value *pointerVectorToVoidPointers(llvm::Value *value);
|
||||
static void addGSMetadata(llvm::Value *inst, SourcePos pos);
|
||||
bool ifsInLoopAllUniform() const;
|
||||
bool ifsInCFAllUniform(int cfType) const;
|
||||
void jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target);
|
||||
llvm::Value *emitGatherCallback(llvm::Value *lvalue, llvm::Value *retPtr);
|
||||
|
||||
@@ -547,6 +640,11 @@ private:
|
||||
const Type *ptrType);
|
||||
|
||||
void restoreMaskGivenReturns(llvm::Value *oldMask);
|
||||
void addSwitchMaskCheck(llvm::Value *mask);
|
||||
bool inSwitchStatement() const;
|
||||
llvm::Value *getMaskAtSwitchEntry();
|
||||
|
||||
CFInfo *popCFState();
|
||||
|
||||
void scatter(llvm::Value *value, llvm::Value *ptr, const Type *ptrType,
|
||||
llvm::Value *mask);
|
||||
|
||||
224
decl.cpp
224
decl.cpp
@@ -46,6 +46,18 @@
|
||||
#include <stdio.h>
|
||||
#include <set>
|
||||
|
||||
static void
|
||||
lPrintTypeQualifiers(int typeQualifiers) {
|
||||
if (typeQualifiers & TYPEQUAL_INLINE) printf("inline ");
|
||||
if (typeQualifiers & TYPEQUAL_CONST) printf("const ");
|
||||
if (typeQualifiers & TYPEQUAL_UNIFORM) printf("uniform ");
|
||||
if (typeQualifiers & TYPEQUAL_VARYING) printf("varying ");
|
||||
if (typeQualifiers & TYPEQUAL_TASK) printf("task ");
|
||||
if (typeQualifiers & TYPEQUAL_SIGNED) printf("signed ");
|
||||
if (typeQualifiers & TYPEQUAL_UNSIGNED) printf("unsigned ");
|
||||
}
|
||||
|
||||
|
||||
/** Given a Type and a set of type qualifiers, apply the type qualifiers to
|
||||
the type, returning the type that is the result.
|
||||
*/
|
||||
@@ -54,6 +66,16 @@ lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) {
|
||||
if (type == NULL)
|
||||
return NULL;
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_CONST) != 0)
|
||||
type = type->GetAsConstType();
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_UNIFORM) != 0)
|
||||
type = type->GetAsUniformType();
|
||||
else if ((typeQualifiers & TYPEQUAL_VARYING) != 0)
|
||||
type = type->GetAsVaryingType();
|
||||
else
|
||||
type = type->GetAsUnboundVariabilityType();
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_UNSIGNED) != 0) {
|
||||
if ((typeQualifiers & TYPEQUAL_SIGNED) != 0)
|
||||
Error(pos, "Illegal to apply both \"signed\" and \"unsigned\" "
|
||||
@@ -64,29 +86,13 @@ lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) {
|
||||
type = unsignedType;
|
||||
else
|
||||
Error(pos, "\"unsigned\" qualifier is illegal with \"%s\" type.",
|
||||
type->GetString().c_str());
|
||||
|
||||
type->ResolveUnboundVariability(Type::Varying)->GetString().c_str());
|
||||
}
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_SIGNED) != 0 && type->IsIntType() == false)
|
||||
Error(pos, "\"signed\" qualifier is illegal with non-integer type "
|
||||
"\"%s\".", type->GetString().c_str());
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_CONST) != 0)
|
||||
type = type->GetAsConstType();
|
||||
|
||||
if ((typeQualifiers & TYPEQUAL_UNIFORM) != 0)
|
||||
type = type->GetAsUniformType();
|
||||
else if ((typeQualifiers & TYPEQUAL_VARYING) != 0)
|
||||
type = type->GetAsVaryingType();
|
||||
else {
|
||||
// otherwise, structs are uniform by default and everything
|
||||
// else is varying by default
|
||||
if (dynamic_cast<const StructType *>(type->GetBaseType()) != NULL)
|
||||
type = type->GetAsUniformType();
|
||||
else
|
||||
type = type->GetAsVaryingType();
|
||||
}
|
||||
"\"%s\".",
|
||||
type->ResolveUnboundVariability(Type::Varying)->GetString().c_str());
|
||||
|
||||
return type;
|
||||
}
|
||||
@@ -138,21 +144,14 @@ lGetStorageClassName(StorageClass storageClass) {
|
||||
|
||||
void
|
||||
DeclSpecs::Print() const {
|
||||
printf("%s ", lGetStorageClassName(storageClass));
|
||||
printf("Declspecs: [%s ", lGetStorageClassName(storageClass));
|
||||
|
||||
if (soaWidth > 0) printf("soa<%d> ", soaWidth);
|
||||
|
||||
if (typeQualifiers & TYPEQUAL_INLINE) printf("inline ");
|
||||
if (typeQualifiers & TYPEQUAL_CONST) printf("const ");
|
||||
if (typeQualifiers & TYPEQUAL_UNIFORM) printf("uniform ");
|
||||
if (typeQualifiers & TYPEQUAL_VARYING) printf("varying ");
|
||||
if (typeQualifiers & TYPEQUAL_TASK) printf("task ");
|
||||
if (typeQualifiers & TYPEQUAL_SIGNED) printf("signed ");
|
||||
if (typeQualifiers & TYPEQUAL_UNSIGNED) printf("unsigned ");
|
||||
|
||||
printf("%s", baseType->GetString().c_str());
|
||||
lPrintTypeQualifiers(typeQualifiers);
|
||||
printf("base type: %s", baseType->GetString().c_str());
|
||||
|
||||
if (vectorSize > 0) printf("<%d>", vectorSize);
|
||||
printf("]");
|
||||
}
|
||||
|
||||
|
||||
@@ -192,19 +191,46 @@ Declarator::GetSymbol() const {
|
||||
|
||||
|
||||
void
|
||||
Declarator::Print() const {
|
||||
Declarator::Print(int indent) const {
|
||||
printf("%*cdeclarator: [", indent, ' ');
|
||||
pos.Print();
|
||||
|
||||
lPrintTypeQualifiers(typeQualifiers);
|
||||
Symbol *sym = GetSymbol();
|
||||
if (sym != NULL)
|
||||
printf("%s", sym->name.c_str());
|
||||
else
|
||||
printf("(null symbol)");
|
||||
|
||||
printf(", array size = %d", arraySize);
|
||||
|
||||
printf(", kind = ");
|
||||
switch (kind) {
|
||||
case DK_BASE: printf("base"); break;
|
||||
case DK_POINTER: printf("pointer"); break;
|
||||
case DK_REFERENCE: printf("reference"); break;
|
||||
case DK_ARRAY: printf("array"); break;
|
||||
case DK_FUNCTION: printf("function"); break;
|
||||
default: FATAL("Unhandled declarator kind");
|
||||
}
|
||||
|
||||
if (initExpr != NULL) {
|
||||
printf(" = (");
|
||||
initExpr->Print();
|
||||
printf(")");
|
||||
}
|
||||
pos.Print();
|
||||
|
||||
if (functionParams.size() > 0) {
|
||||
for (unsigned int i = 0; i < functionParams.size(); ++i) {
|
||||
printf("\n%*cfunc param %d:\n", indent, ' ', i);
|
||||
functionParams[i]->Print(indent+4);
|
||||
}
|
||||
}
|
||||
|
||||
if (child != NULL)
|
||||
child->Print(indent + 4);
|
||||
|
||||
printf("]\n");
|
||||
}
|
||||
|
||||
|
||||
@@ -216,7 +242,7 @@ Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
|
||||
return NULL;
|
||||
|
||||
Symbol *declSym = GetSymbol();
|
||||
assert(declSym != NULL);
|
||||
Assert(declSym != NULL);
|
||||
|
||||
// Get the symbol for the function from the symbol table. (It should
|
||||
// already have been added to the symbol table by AddGlobal() by the
|
||||
@@ -232,14 +258,16 @@ Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
|
||||
Declarator *d = this;
|
||||
while (d != NULL && d->kind != DK_FUNCTION)
|
||||
d = d->child;
|
||||
assert(d != NULL);
|
||||
Assert(d != NULL);
|
||||
|
||||
for (unsigned int i = 0; i < d->functionParams.size(); ++i) {
|
||||
Declaration *pdecl = d->functionParams[i];
|
||||
assert(pdecl->declarators.size() == 1);
|
||||
funArgs->push_back(pdecl->declarators[0]->GetSymbol());
|
||||
Symbol *sym = d->GetSymbolForFunctionParameter(i);
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
funArgs->push_back(sym);
|
||||
}
|
||||
|
||||
funSym->type = funSym->type->ResolveUnboundVariability(Type::Varying);
|
||||
|
||||
return funSym;
|
||||
}
|
||||
|
||||
@@ -258,17 +286,23 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
if (kind != DK_FUNCTION && isTask)
|
||||
Error(pos, "\"task\" qualifier illegal in variable declaration.");
|
||||
|
||||
Type::Variability variability = Type::Unbound;
|
||||
if (hasUniformQual)
|
||||
variability = Type::Uniform;
|
||||
else if (hasVaryingQual)
|
||||
variability = Type::Varying;
|
||||
|
||||
const Type *type = base;
|
||||
switch (kind) {
|
||||
case DK_BASE:
|
||||
// All of the type qualifiers should be in the DeclSpecs for the
|
||||
// base declarator
|
||||
assert(typeQualifiers == 0);
|
||||
assert(child == NULL);
|
||||
Assert(typeQualifiers == 0);
|
||||
Assert(child == NULL);
|
||||
return type;
|
||||
|
||||
case DK_POINTER:
|
||||
type = new PointerType(type, hasUniformQual, isConst);
|
||||
type = new PointerType(type, variability, isConst);
|
||||
if (child != NULL)
|
||||
return child->GetType(type, ds);
|
||||
else
|
||||
@@ -316,25 +350,7 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
for (unsigned int i = 0; i < functionParams.size(); ++i) {
|
||||
Declaration *d = functionParams[i];
|
||||
|
||||
char buf[32];
|
||||
Symbol *sym;
|
||||
if (d->declarators.size() == 0) {
|
||||
// function declaration like foo(float), w/o a name for
|
||||
// the parameter
|
||||
sprintf(buf, "__anon_parameter_%d", i);
|
||||
sym = new Symbol(buf, pos);
|
||||
sym->type = d->declSpecs->GetBaseType(pos);
|
||||
}
|
||||
else {
|
||||
sym = d->declarators[0]->GetSymbol();
|
||||
if (sym == NULL) {
|
||||
// Handle more complex anonymous declarations like
|
||||
// float (float **).
|
||||
sprintf(buf, "__anon_parameter_%d", i);
|
||||
sym = new Symbol(buf, d->declarators[0]->pos);
|
||||
sym->type = d->declarators[0]->GetType(d->declSpecs);
|
||||
}
|
||||
}
|
||||
Symbol *sym = GetSymbolForFunctionParameter(i);
|
||||
|
||||
if (d->declSpecs->storageClass != SC_NONE)
|
||||
Error(sym->pos, "Storage class \"%s\" is illegal in "
|
||||
@@ -376,13 +392,13 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
// it lives down to the base declarator.
|
||||
Declarator *decl = d->declarators[0];
|
||||
while (decl->child != NULL) {
|
||||
assert(decl->initExpr == NULL);
|
||||
Assert(decl->initExpr == NULL);
|
||||
decl = decl->child;
|
||||
}
|
||||
|
||||
if (decl->initExpr != NULL &&
|
||||
(decl->initExpr = decl->initExpr->TypeCheck()) != NULL &&
|
||||
(decl->initExpr = decl->initExpr->Optimize()) != NULL &&
|
||||
(decl->initExpr = TypeCheck(decl->initExpr)) != NULL &&
|
||||
(decl->initExpr = Optimize(decl->initExpr)) != NULL &&
|
||||
(init = dynamic_cast<ConstExpr *>(decl->initExpr)) == NULL) {
|
||||
Error(decl->initExpr->pos, "Default value for parameter "
|
||||
"\"%s\" must be a compile-time constant.",
|
||||
@@ -397,7 +413,7 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
Error(pos, "No return type provided in function declaration.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
bool isExported = ds && (ds->storageClass == SC_EXPORT);
|
||||
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
|
||||
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
|
||||
@@ -418,9 +434,10 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Type *functionType =
|
||||
new FunctionType(returnType, args, pos, argNames, argDefaults,
|
||||
const Type *functionType =
|
||||
new FunctionType(returnType, args, argNames, argDefaults,
|
||||
argPos, isTask, isExported, isExternC);
|
||||
functionType = functionType->ResolveUnboundVariability(Type::Varying);
|
||||
return child->GetType(functionType, ds);
|
||||
}
|
||||
default:
|
||||
@@ -461,6 +478,35 @@ Declarator::GetType(DeclSpecs *ds) const {
|
||||
}
|
||||
|
||||
|
||||
Symbol *
|
||||
Declarator::GetSymbolForFunctionParameter(int paramNum) const {
|
||||
Assert(paramNum < (int)functionParams.size());
|
||||
Declaration *d = functionParams[paramNum];
|
||||
|
||||
char buf[32];
|
||||
Symbol *sym;
|
||||
if (d->declarators.size() == 0) {
|
||||
// function declaration like foo(float), w/o a name for
|
||||
// the parameter
|
||||
sprintf(buf, "__anon_parameter_%d", paramNum);
|
||||
sym = new Symbol(buf, pos);
|
||||
sym->type = d->declSpecs->GetBaseType(pos);
|
||||
}
|
||||
else {
|
||||
Assert(d->declarators.size() == 1);
|
||||
sym = d->declarators[0]->GetSymbol();
|
||||
if (sym == NULL) {
|
||||
// Handle more complex anonymous declarations like
|
||||
// float (float **).
|
||||
sprintf(buf, "__anon_parameter_%d", paramNum);
|
||||
sym = new Symbol(buf, d->declarators[0]->pos);
|
||||
sym->type = d->declarators[0]->GetType(d->declSpecs);
|
||||
}
|
||||
}
|
||||
return sym;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Declaration
|
||||
|
||||
@@ -485,23 +531,19 @@ Declaration::Declaration(DeclSpecs *ds, Declarator *d) {
|
||||
|
||||
std::vector<VariableDeclaration>
|
||||
Declaration::GetVariableDeclarations() const {
|
||||
assert(declSpecs->storageClass != SC_TYPEDEF);
|
||||
Assert(declSpecs->storageClass != SC_TYPEDEF);
|
||||
std::vector<VariableDeclaration> vars;
|
||||
|
||||
for (unsigned int i = 0; i < declarators.size(); ++i) {
|
||||
if (declarators[i] == NULL)
|
||||
continue;
|
||||
Declarator *decl = declarators[i];
|
||||
if (decl == NULL)
|
||||
// Ignore earlier errors
|
||||
continue;
|
||||
|
||||
Symbol *sym = decl->GetSymbol();
|
||||
if (dynamic_cast<const FunctionType *>(sym->type) != NULL) {
|
||||
// function declaration
|
||||
m->symbolTable->AddFunction(sym);
|
||||
}
|
||||
else {
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
|
||||
if (dynamic_cast<const FunctionType *>(sym->type) == NULL) {
|
||||
m->symbolTable->AddVariable(sym);
|
||||
vars.push_back(VariableDeclaration(sym, decl->initExpr));
|
||||
}
|
||||
@@ -511,16 +553,36 @@ Declaration::GetVariableDeclarations() const {
|
||||
|
||||
|
||||
void
|
||||
Declaration::Print() const {
|
||||
printf("Declaration: specs [");
|
||||
declSpecs->Print();
|
||||
printf("], declarators [");
|
||||
for (unsigned int i = 0 ; i < declarators.size(); ++i) {
|
||||
declarators[i]->Print();
|
||||
printf("%s", (i == declarators.size() - 1) ? "]" : ", ");
|
||||
Declaration::DeclareFunctions() {
|
||||
Assert(declSpecs->storageClass != SC_TYPEDEF);
|
||||
|
||||
for (unsigned int i = 0; i < declarators.size(); ++i) {
|
||||
Declarator *decl = declarators[i];
|
||||
if (decl == NULL)
|
||||
// Ignore earlier errors
|
||||
continue;
|
||||
|
||||
Symbol *sym = decl->GetSymbol();
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
|
||||
if (dynamic_cast<const FunctionType *>(sym->type) == NULL)
|
||||
continue;
|
||||
|
||||
bool isInline = (declSpecs->typeQualifiers & TYPEQUAL_INLINE);
|
||||
m->AddFunctionDeclaration(sym, isInline);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Declaration::Print(int indent) const {
|
||||
printf("%*cDeclaration: specs [", indent, ' ');
|
||||
declSpecs->Print();
|
||||
printf("], declarators:\n");
|
||||
for (unsigned int i = 0 ; i < declarators.size(); ++i)
|
||||
declarators[i]->Print(indent+4);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void
|
||||
@@ -539,7 +601,7 @@ GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
|
||||
DeclSpecs ds(type);
|
||||
if (type->IsUniformType())
|
||||
ds.typeQualifiers |= TYPEQUAL_UNIFORM;
|
||||
else
|
||||
else if (type->IsVaryingType())
|
||||
ds.typeQualifiers |= TYPEQUAL_VARYING;
|
||||
|
||||
for (unsigned int j = 0; j < sd[i]->declarators->size(); ++j) {
|
||||
|
||||
10
decl.h
10
decl.h
@@ -153,10 +153,12 @@ public:
|
||||
declarator and symbols for its arguments in *args. */
|
||||
Symbol *GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *args);
|
||||
|
||||
Symbol *GetSymbolForFunctionParameter(int paramNum) const;
|
||||
|
||||
/** Returns the symbol associated with the declarator. */
|
||||
Symbol *GetSymbol() const;
|
||||
|
||||
void Print() const;
|
||||
void Print(int indent) const;
|
||||
|
||||
/** Position of the declarator in the source program. */
|
||||
const SourcePos pos;
|
||||
@@ -199,7 +201,7 @@ public:
|
||||
Declaration(DeclSpecs *ds, std::vector<Declarator *> *dlist = NULL);
|
||||
Declaration(DeclSpecs *ds, Declarator *d);
|
||||
|
||||
void Print() const;
|
||||
void Print(int indent) const;
|
||||
|
||||
/** This method walks through all of the Declarators in a declaration
|
||||
and returns a fully-initialized Symbol and (possibly) and
|
||||
@@ -208,6 +210,10 @@ public:
|
||||
Declarator representation.) */
|
||||
std::vector<VariableDeclaration> GetVariableDeclarations() const;
|
||||
|
||||
/** For any function declarations in the Declaration, add the
|
||||
declaration to the module. */
|
||||
void DeclareFunctions();
|
||||
|
||||
DeclSpecs *declSpecs;
|
||||
std::vector<Declarator *> declarators;
|
||||
};
|
||||
|
||||
@@ -1,3 +1,73 @@
|
||||
=== v1.1.3 === (20 January 2012)
|
||||
|
||||
With this release, the language now supports "switch" statements, with the
|
||||
same semantics and syntax as in C.
|
||||
|
||||
This release includes fixes for two important performance related issues:
|
||||
the quality of code generated for "foreach" statements has been
|
||||
substantially improved (https://github.com/ispc/ispc/issues/151), and a
|
||||
performance regression with code for "gathers" that was introduced in
|
||||
v1.1.2 has been fixed in this release.
|
||||
|
||||
A number of other small bugs were fixed in this release as well, including
|
||||
one where invalid memory would sometimes be incorrectly accessed
|
||||
(https://github.com/ispc/ispc/issues/160).
|
||||
|
||||
Thanks to Jean-Luc Duprat for a number of patches that improve support for
|
||||
building on various platforms, and to Pierre-Antoine Lacaze for patches so
|
||||
that ispc builds under MinGW.
|
||||
|
||||
=== v1.1.2 === (9 January 2012)
|
||||
|
||||
The major new feature in this release is support for "generic" C++
|
||||
vectorized output; in other words, ispc can emit C++ code that corresponds
|
||||
to the vectorized computation that the ispc program represents. See the
|
||||
examples/intrinsics directory in the ispc distribution for two example
|
||||
implementations of the set of functions that must be provided map the
|
||||
vector calls generated by ispc to target specific functions.
|
||||
|
||||
ispc now has partial support for 'goto' statements; specifically, goto is
|
||||
allowed if any enclosing control flow statements (if/for/while/do) have
|
||||
'uniform' test expressions, but not if they have 'varying' tests.
|
||||
|
||||
A number of improvements have been made to the code generated for gathers
|
||||
and scatters--one of them (better matching x86's "free" scale by 2/4/8 for
|
||||
addressing calculations) improved the performance of the noise example by
|
||||
14%.
|
||||
|
||||
Many small bugs have been fixed in this release as well, including issue
|
||||
numbers 138, 129, 135, 127, 149, and 142.
|
||||
|
||||
=== v1.1.1 === (15 December 2011)
|
||||
|
||||
This release doesn't include any significant new functionality, but does
|
||||
include a small improvements in generated code and a number of bug fixes.
|
||||
|
||||
The one user-visible language change is that integer constants may be
|
||||
specified with 'u' and 'l' suffixes, like in C. For example, "1024llu"
|
||||
defines the constant with unsigned 64-bit type.
|
||||
|
||||
More informative and useful error messages are printed when function
|
||||
overload resolution fails.
|
||||
|
||||
Masking is avoided in additional cases when the mask can be
|
||||
statically-determined to be all on.
|
||||
|
||||
A number of small bugs have been fixed:
|
||||
- Under some circumstances, incorrect masks were used when assigning a
|
||||
value to a reference and when doing gathers/scatters.
|
||||
- Incorrect code could be generated in some cases when some instances
|
||||
returned part way through a function but others contineud executing.
|
||||
- Type checking wasn't being performed for calls through function pointers;
|
||||
now an error is issued if the arguments don't match up, etc.
|
||||
- Incorrect code was being generated for gather/scatter to structs that had
|
||||
elements with varying short-vector types.
|
||||
- Typechecking wasn't being performed for "foreach" statements; this led to
|
||||
problems like function overload resolution not being performed if an
|
||||
overloaded function call was used to determine the iteration range..
|
||||
- A number of symbols would be multiply-defined when compiling to multiple
|
||||
targets and using the sse2-x2 target as one of them (issue #131).
|
||||
|
||||
=== v1.1.0 === (5 December 2011)
|
||||
|
||||
This is a major new release of the compiler, with significant additions to
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
|
||||
for i in ispc perfguide faq; do
|
||||
rst2html.py --template=template.txt --link-stylesheet \
|
||||
--stylesheet-path=css/style.css $i.txt > $i.html
|
||||
--stylesheet-path=css/style.css $i.rst > $i.html
|
||||
done
|
||||
|
||||
rst2html.py --template=template-perf.txt --link-stylesheet \
|
||||
--stylesheet-path=css/style.css perf.txt > perf.html
|
||||
--stylesheet-path=css/style.css perf.rst > perf.html
|
||||
|
||||
#rst2latex --section-numbering --documentclass=article --documentoptions=DIV=9,10pt,letterpaper ispc.txt > ispc.tex
|
||||
#pdflatex ispc.tex
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
=============================================================
|
||||
Intel® SPMD Program Compiler Frequently Asked Questions (FAQ)
|
||||
=============================================================
|
||||
=====================================
|
||||
Frequently Asked Questions About ispc
|
||||
=====================================
|
||||
|
||||
This document includes a number of frequently (and not frequently) asked
|
||||
questions about ispc, the Intel® SPMD Program Compiler. The source to this
|
||||
document is in the file ``docs/faq.txt`` in the ``ispc`` source
|
||||
document is in the file ``docs/faq.rst`` in the ``ispc`` source
|
||||
distribution.
|
||||
|
||||
* Understanding ispc's Output
|
||||
@@ -273,10 +273,10 @@ Then four object files will be generated: ``foo_sse2.o``, ``foo_sse4.o``,
|
||||
``foo_avx.o``, and ``foo.o``.[#]_ Link all of these into your executable, and
|
||||
when you call a function in ``foo.ispc`` from your application code,
|
||||
``ispc`` will determine which instruction sets are supported by the CPU the
|
||||
code is running on and will call the most appropraite version of the
|
||||
code is running on and will call the most appropriate version of the
|
||||
function available.
|
||||
|
||||
.. [#] Similarly, if you choose to generate assembly langauage output or
|
||||
.. [#] Similarly, if you choose to generate assembly language output or
|
||||
LLVM bitcode output, multiple versions of those files will be created.
|
||||
|
||||
In general, the version of the function that runs will be the one in the
|
||||
@@ -26,9 +26,9 @@ The main goals behind ``ispc`` are to:
|
||||
units without the extremely low-programmer-productivity activity of directly
|
||||
writing intrinsics.
|
||||
* Explore opportunities from close-coupling between C/C++ application code
|
||||
and SPMD ``ispc`` code running on the same processor--lightweight funcion
|
||||
calls betwen the two languages, sharing data directly via pointers without
|
||||
copying or reformating, etc.
|
||||
and SPMD ``ispc`` code running on the same processor--lightweight function
|
||||
calls between the two languages, sharing data directly via pointers without
|
||||
copying or reformatting, etc.
|
||||
|
||||
**We are very interested in your feedback and comments about ispc and
|
||||
in hearing your experiences using the system. We are especially interested
|
||||
@@ -56,6 +56,7 @@ Contents:
|
||||
|
||||
+ `Basic Command-line Options`_
|
||||
+ `Selecting The Compilation Target`_
|
||||
+ `Generating Generic C++ Output`_
|
||||
+ `Selecting 32 or 64 Bit Addressing`_
|
||||
+ `The Preprocessor`_
|
||||
+ `Debugging`_
|
||||
@@ -98,7 +99,9 @@ Contents:
|
||||
+ `Control Flow`_
|
||||
|
||||
* `Conditional Statements: "if"`_
|
||||
* `Conditional Statements: "switch"`_
|
||||
* `Basic Iteration Statements: "for", "while", and "do"`_
|
||||
* `Unstructured Control Flow: "goto"`_
|
||||
* `"Coherent" Control Flow Statements: "cif" and Friends`_
|
||||
* `Parallel Iteration Statements: "foreach" and "foreach_tiled"`_
|
||||
* `Parallel Iteration with "programIndex" and "programCount"`_
|
||||
@@ -146,8 +149,6 @@ Contents:
|
||||
+ `Restructuring Existing Programs to Use ISPC`_
|
||||
+ `Understanding How to Interoperate With the Application's Data`_
|
||||
|
||||
* `Related Languages`_
|
||||
|
||||
* `Disclaimer and Legal Information`_
|
||||
|
||||
* `Optimization Notice`_
|
||||
@@ -251,7 +252,7 @@ of the value.
|
||||
The first thing to notice in this program is the presence of the ``export``
|
||||
keyword in the function definition; this indicates that the function should
|
||||
be made available to be called from application code. The ``uniform``
|
||||
qualifiers on the parameters to ``simple`` indicate that the correpsonding
|
||||
qualifiers on the parameters to ``simple`` indicate that the corresponding
|
||||
variables are non-vector quantities--this concept is discussed in detail in the
|
||||
`"uniform" and "varying" Qualifiers`_ section.
|
||||
|
||||
@@ -323,7 +324,7 @@ When the executable ``simple`` runs, it generates the expected output:
|
||||
...
|
||||
|
||||
For a slightly more complex example of using ``ispc``, see the `Mandelbrot
|
||||
set example`_ page on the ``ispc`` website for a walkthrough of an ``ispc``
|
||||
set example`_ page on the ``ispc`` website for a walk-through of an ``ispc``
|
||||
implementation of that algorithm. After reading through that example, you
|
||||
may want to examine the source code of the various examples in the
|
||||
``examples/`` directory of the ``ispc`` distribution.
|
||||
@@ -374,7 +375,7 @@ Optimizations are on by default; they can be turned off with ``-O0``:
|
||||
On Mac\* and Linux\*, there is basic support for generating debugging
|
||||
symbols; this is enabled with the ``-g`` command-line flag. Using ``-g``
|
||||
causes optimizations to be disabled; to compile with debugging symbols and
|
||||
optimizaion, ``-O1`` should be provided as well as the ``-g`` flag.
|
||||
optimization, ``-O1`` should be provided as well as the ``-g`` flag.
|
||||
|
||||
The ``-h`` flag can also be used to direct ``ispc`` to generate a C/C++
|
||||
header file that includes C/C++ declarations of the C-callable ``ispc``
|
||||
@@ -402,7 +403,7 @@ which sets the target architecture, ``--cpu``, which sets the target CPU,
|
||||
and ``--target``, which sets the target instruction set.
|
||||
|
||||
By default, the ``ispc`` compiler generates code for the 64-bit x86-64
|
||||
architecture (i.e. ``--arch=x86-64`.) To compile to a 32-bit x86 target,
|
||||
architecture (i.e. ``--arch=x86-64``.) To compile to a 32-bit x86 target,
|
||||
supply ``--arch=x86`` on the command line:
|
||||
|
||||
::
|
||||
@@ -434,6 +435,65 @@ Intel® SSE2, use ``--target=sse2``. (As with the other options in this
|
||||
section, see the output of ``ispc --help`` for a full list of supported
|
||||
targets.)
|
||||
|
||||
Generating Generic C++ Output
|
||||
-----------------------------
|
||||
|
||||
In addition to generating object files or assembly output for specific
|
||||
targets like SSE2, SSE4, and AVX, ``ispc`` provides an option to generate
|
||||
"generic" C++ output. This
|
||||
|
||||
As an example, consider the following simple ``ispc`` program:
|
||||
|
||||
::
|
||||
|
||||
int foo(int i, int j) {
|
||||
return (i < 0) ? 0 : i + j;
|
||||
}
|
||||
|
||||
If this program is compiled with the following command:
|
||||
|
||||
::
|
||||
|
||||
ispc foo.ispc --emit-c++ --target=generic-4 -o foo.cpp
|
||||
|
||||
Then ``foo()`` is compiled to the following C++ code (after various
|
||||
automatically-generated boilerplate code):
|
||||
|
||||
::
|
||||
|
||||
__vec4_i32 foo(__vec4_i32 i_llvm_cbe, __vec4_i32 j_llvm_cbe,
|
||||
__vec4_i1 __mask_llvm_cbe) {
|
||||
return (__select((__signed_less_than(i_llvm_cbe,
|
||||
__vec4_i32 (0u, 0u, 0u, 0u))),
|
||||
__vec4_i32 (0u, 0u, 0u, 0u),
|
||||
(__add(i_llvm_cbe, j_llvm_cbe))));
|
||||
}
|
||||
|
||||
Note that the original computation has been expressed in terms of a number
|
||||
of vector types (e.g. ``__vec4_i32`` for a 4-wide vector of 32-bit integers
|
||||
and ``__vec4_i1`` for a 4-wide vector of boolean values) and in terms of
|
||||
vector operations on these types like ``__add()`` and ``__select()``).
|
||||
|
||||
You are then free to provide your own implementations of these types and
|
||||
functions. For example, you might want to target a specific vector ISA, or
|
||||
you might want to instrument these functions for performance measurements.
|
||||
|
||||
There is an example implementation of 4-wide variants of the required
|
||||
functions, suitable for use with the ``generic-4`` target in the file
|
||||
``examples/intrinsics/sse4.h``, and there is an example straightforward C
|
||||
implementation of the 16-wide variants for the ``generic-16`` target in the
|
||||
file ``examples/intrinsics/generic-16.h``. There is not yet comprehensive
|
||||
documentation of these types and the functions that must be provided for
|
||||
them when the C++ target is used, but a review of those two files should
|
||||
provide the basic context.
|
||||
|
||||
If you are using C++ source emission, you may also find the
|
||||
``--c++-include-file=<filename>`` command line argument useful; it adds an
|
||||
``#include`` statement with the given filename at the top of the emitted
|
||||
C++ file; this can be used to easily include specific implementations of
|
||||
the vector types and functions.
|
||||
|
||||
|
||||
Selecting 32 or 64 Bit Addressing
|
||||
---------------------------------
|
||||
|
||||
@@ -473,6 +533,9 @@ preprocessor runs:
|
||||
* - ISPC_TARGET_{SSE2,SSE4,AVX}
|
||||
- 1
|
||||
- One of these will be set, depending on the compilation target.
|
||||
* - ISPC_POINTER_SIZE
|
||||
- 32 or 64
|
||||
- Number of bits used to represent a pointer for the target architecture.
|
||||
* - ISPC_MAJOR_VERSION
|
||||
- 1
|
||||
- Major version of the ``ispc`` compiler/language
|
||||
@@ -525,8 +588,8 @@ Basic Concepts: Program Instances and Gangs of Program Instances
|
||||
Upon entry to a ``ispc`` function called from C/C++ code, the execution
|
||||
model switches from the application's serial model to ``ispc``'s execution
|
||||
model. Conceptually, a number of ``ispc`` *program instances* start
|
||||
running in concurrently. The group of running program instances is a
|
||||
called *gang* (harkening to "gang scheduling", since ``ispc`` provides
|
||||
running concurrently. The group of running program instances is a
|
||||
called a *gang* (harkening to "gang scheduling", since ``ispc`` provides
|
||||
certain guarantees about the control flow coherence of program instances
|
||||
running in a gang, detailed in `Gang Convergence Guarantees`_.) An
|
||||
``ispc`` program instance is thus similar to a CUDA* "thread" or an OpenCL*
|
||||
@@ -609,7 +672,7 @@ side-effects.
|
||||
|
||||
Upon entry to an ``ispc`` function called by the application, the execution
|
||||
mask is "all on" and the program counter points at the first statement in
|
||||
the function. The following two statments describe the required behavior
|
||||
the function. The following two statements describe the required behavior
|
||||
of the program counter and the execution mask over the course of execution
|
||||
of an ``ispc`` function.
|
||||
|
||||
@@ -730,7 +793,7 @@ program instances is *maximally converged*. Maximal convergence means that
|
||||
if two program instances follow the same control path, they are guaranteed
|
||||
to execute each program statement concurrently. If two program instances
|
||||
follow diverging control paths, it is guaranteed that they will reconverge
|
||||
as soon as possible (if they do later reconverge). [#]_
|
||||
as soon as possible in the function (if they do later reconverge). [#]_
|
||||
|
||||
.. [#] This is another significant difference between the ``ispc``
|
||||
execution model and the one implemented by OpenCL* and CUDA*, which
|
||||
@@ -754,9 +817,25 @@ It is guaranteed that all program instances that were running before the
|
||||
for the gang of program instances, rather than the concept of a unique
|
||||
program counter for each program instance.)
|
||||
|
||||
Another implication of this property is that it is illegal to execute a
|
||||
function with an 8-wide gang by running it two times, with a 4-wide gang
|
||||
representing half of the original 8-wide gang each time.
|
||||
Another implication of this property is that it would be illegal for the
|
||||
``ispc`` implementation to execute a function with an 8-wide gang by
|
||||
running it two times, with a 4-wide gang representing half of the original
|
||||
8-wide gang each time.
|
||||
|
||||
It also follows that given the following program:
|
||||
|
||||
::
|
||||
|
||||
if (programIndex == 0) {
|
||||
while (true) // infinite loop
|
||||
;
|
||||
}
|
||||
print("hello, world\n");
|
||||
|
||||
the program will loop infinitely and the ``print`` statement will never be
|
||||
executed. (A different execution model that allowed gang divergence might
|
||||
execute the ``print`` statement since not all program instances were caught
|
||||
in the infinite loop in the example above.)
|
||||
|
||||
The way that "varying" function pointers are handled in ``ispc`` is also
|
||||
affected by this guarantee: if a function pointer is ``varying``, then it
|
||||
@@ -802,7 +881,7 @@ of control flow, will say that control flow based on ``varying``
|
||||
expressions is "varying" control flow.)
|
||||
|
||||
Consider for example an image filtering operation where the program loops
|
||||
over pixels adjacent to the given (x,y) coordiantes:
|
||||
over pixels adjacent to the given (x,y) coordinates:
|
||||
|
||||
::
|
||||
|
||||
@@ -902,7 +981,7 @@ for all program instances in the gang, it's possible that the "true" clause
|
||||
executed with an "all off" mask and ``b`` was modified there.
|
||||
|
||||
If it is important that code never be executed with an "all off" execution
|
||||
mask, then the ``cif`` statment (documented in the `"Coherent" Control Flow
|
||||
mask, then the ``cif`` statement (documented in the `"Coherent" Control Flow
|
||||
Statements: "cif" and Friends`_ section) can be used in place of a regular
|
||||
``if``, as it guarantees this property.
|
||||
|
||||
@@ -973,6 +1052,20 @@ which of them will write their value of ``value`` to ``array[index]``.
|
||||
array[index] = value;
|
||||
}
|
||||
|
||||
As another example, if the values of the array indices ``i`` and ``j`` have
|
||||
the same values for some of the program instances, and an assignment like
|
||||
the following is performed:
|
||||
|
||||
::
|
||||
|
||||
int i = ..., j = ...;
|
||||
uniform int array[...] = { ... };
|
||||
array[i] = array[j];
|
||||
|
||||
|
||||
then the program's behavior is undefined, since there is no sequence point
|
||||
between the reads and writes to the same location.
|
||||
|
||||
While this rule that says that program instances can safely depend on
|
||||
side-effects from by other program instances in their gang eliminates a
|
||||
class of synchronization requirements imposed by some other SPMD languages,
|
||||
@@ -1014,7 +1107,7 @@ completed.
|
||||
The ISPC Language
|
||||
=================
|
||||
|
||||
``ispc`` is an extended verion of the C programming language, providing a
|
||||
``ispc`` is an extended version of the C programming language, providing a
|
||||
number of new features that make it easy to write high-performance SPMD
|
||||
programs for the CPU. Note that between not only the few small syntactic
|
||||
differences between ``ispc`` and C code but more importantly ``ispc``'s
|
||||
@@ -1035,12 +1128,12 @@ This subsection summarizes the differences between ``ispc`` and C; if you
|
||||
are already familiar with C, you may find it most effective to focus on
|
||||
this subsection and just focus on the topics in the remainder of section
|
||||
that introduce new language features. You may also find it helpful to
|
||||
comapre the ``ispc`` and C++ implementations of various algorithms in the
|
||||
compare the ``ispc`` and C++ implementations of various algorithms in the
|
||||
``ispc`` ``examples/`` directory to get a sense of the close relationship
|
||||
between ``ispc`` and C.
|
||||
|
||||
Specifically, C89 is used as the baseline for comparison in this subsection
|
||||
(this is also the verion of C described in the Second Edition of Kernighan
|
||||
(this is also the version of C described in the Second Edition of Kernighan
|
||||
and Ritchie's book). (``ispc`` adopts some features from C99 and from C++,
|
||||
which will be highlighted in the below.)
|
||||
|
||||
@@ -1049,7 +1142,7 @@ in C:
|
||||
|
||||
* Expression syntax and basic types
|
||||
* Syntax for variable declarations
|
||||
* Control flow structures: if, for, while, do
|
||||
* Control flow structures: ``if``, ``for``, ``while``, ``do``, and ``switch``.
|
||||
* Pointers, including function pointers, ``void *``, and C's array/pointer
|
||||
duality (arrays are converted to pointers when passed to functions, etc.)
|
||||
* Structs and arrays
|
||||
@@ -1068,7 +1161,7 @@ in C:
|
||||
statement itself (e.g. ``for (int i = 0; ...``)
|
||||
* The ``inline`` qualifier to indicate that a function should be inlined
|
||||
* Function overloading by parameter type
|
||||
* Hexidecimal floating-point constants
|
||||
* Hexadecimal floating-point constants
|
||||
|
||||
``ispc`` also adds a number of new features that aren't in C89, C99, or
|
||||
C++:
|
||||
@@ -1093,7 +1186,7 @@ but are likely to be supported in future releases:
|
||||
``int64`` types
|
||||
* Character constants
|
||||
* String constants and arrays of characters as strings
|
||||
* ``switch`` and ``goto`` statements
|
||||
* ``goto`` statements are partially supported (see `Unstructured Control Flow: "goto"`_)
|
||||
* ``union`` types
|
||||
* Bitfield members of ``struct`` types
|
||||
* Variable numbers of arguments to functions
|
||||
@@ -1127,11 +1220,11 @@ The following reserved words from C89 are also reserved in ``ispc``:
|
||||
Lexical Structure
|
||||
-----------------
|
||||
|
||||
Tokens in ``ispc`` are delimted by white-space and comments. The
|
||||
Tokens in ``ispc`` are delimited by white-space and comments. The
|
||||
white-space characters are the usual set of spaces, tabs, and carriage
|
||||
returns/line feeds. Comments can be delinated with ``//``, which starts a
|
||||
returns/line feeds. Comments can be delineated with ``//``, which starts a
|
||||
comment that continues to the end of the line, or the start of a comment
|
||||
can be delinated with ``/*`` and the end with ``*/``. Like C/C++,
|
||||
can be delineated with ``/*`` and the end with ``*/``. Like C/C++,
|
||||
comments can't be nested.
|
||||
|
||||
Identifiers in ``ispc`` are sequences of characters that start with an
|
||||
@@ -1139,9 +1232,9 @@ underscore or an upper-case or lower-case letter, and then followed by
|
||||
zero or more letters, numbers, or underscores. Identifiers that start with
|
||||
two underscores are reserved for use by the compiler.
|
||||
|
||||
Integer numeric constants can be specified in base 10, hexidecimal, or
|
||||
Integer numeric constants can be specified in base 10, hexadecimal, or
|
||||
binary. (Octal integer constants aren't supported). Base 10 constants are
|
||||
given by a sequence of one or more digits from 0 to 9. Hexidecimal
|
||||
given by a sequence of one or more digits from 0 to 9. Hexadecimal
|
||||
constants are denoted by a leading ``0x`` and then one or more digits from
|
||||
0-9, a-f, or A-F. Finally, binary constants are denoted by a leading
|
||||
``0b`` and then a sequence of 1s and 0s.
|
||||
@@ -1154,6 +1247,18 @@ Here are three ways of specifying the integer value "15":
|
||||
int fifteen_hex = 0xf;
|
||||
int fifteen_binary = 0b1111;
|
||||
|
||||
A number of suffixes can be provided with integer numeric constants.
|
||||
First, "u" denotes that the constant is unsigned, and "ll" denotes a 64-bit
|
||||
integer constant (while "l" denotes a 32-bit integer constant). It is also
|
||||
possible to denote units of 1024, 1024*1024, or 1024*1024*1024 with the
|
||||
SI-inspired suffixes "k", "M", and "G" respectively:
|
||||
|
||||
::
|
||||
|
||||
int two_kb = 2k; // 2048
|
||||
int two_megs = 2M; // 2 * 1024 * 1024
|
||||
int one_gig = 1G; // 1024 * 1024 * 1024
|
||||
|
||||
Floating-point constants can be specified in one of three ways. First,
|
||||
they may be a sequence of zero or more digits from 0 to 9, followed by a
|
||||
period, followed by zero or more digits from 0 to 9. (There must be at
|
||||
@@ -1163,11 +1268,11 @@ The second option is scientific notation, where a base value is specified
|
||||
as the first form of a floating-point constant but is then followed by an
|
||||
"e" or "E", then a plus sign or a minus sign, and then an exponent.
|
||||
|
||||
Finally, floating-point constants may be specified as hexidecimal
|
||||
Finally, floating-point constants may be specified as hexadecimal
|
||||
constants; this form can ensure a perfectly bit-accurate representation of
|
||||
a particular floating-point number. These are specified with an "0x"
|
||||
prefix, followed by a zero or a one, a period, and then the remainder of
|
||||
the mantissa in hexidecimal form, with digits from 0-9, a-f, or A-F. The
|
||||
the mantissa in hexadecimal form, with digits from 0-9, a-f, or A-F. The
|
||||
start of the exponent is denoted by a "p", which is then followed by an
|
||||
optional plus or minus sign and then digits from 0 to 9. For example:
|
||||
|
||||
@@ -1204,7 +1309,7 @@ to specify special characters. These sequences all start with an initial
|
||||
* - ``\n``
|
||||
- newline
|
||||
* - ``\r``
|
||||
- carriabe return
|
||||
- carriage return
|
||||
* - ``\t``
|
||||
- horizontal tab
|
||||
* - ``\v``
|
||||
@@ -1212,7 +1317,7 @@ to specify special characters. These sequences all start with an initial
|
||||
* - ``\`` followed by one or more digits from 0-8
|
||||
- ASCII character in octal notation
|
||||
* - ``\x``, followed by one or more digits from 0-9, a-f, A-F
|
||||
- ASCII character in hexidecimal notation
|
||||
- ASCII character in hexadecimal notation
|
||||
|
||||
``ispc`` doesn't support a string data type; string constants can be passed
|
||||
as the first argument to the ``print()`` statement, however. ``ispc`` also
|
||||
@@ -1367,7 +1472,7 @@ store are:
|
||||
uniform float bar[10];
|
||||
|
||||
The first declaration corresponds to 10 gang-wide ``float`` values in
|
||||
memory, while the second declaration corresonds to 10 ``float`` values.
|
||||
memory, while the second declaration corresponds to 10 ``float`` values.
|
||||
|
||||
|
||||
Defining New Names For Types
|
||||
@@ -1531,7 +1636,7 @@ instance in the gang has its own unique pointer value)
|
||||
|
||||
(The rationale for this limitation is that references must be represented
|
||||
as either a uniform pointer or a varying pointer internally. While
|
||||
choosing a varying pointer would provide maximum flexibilty and eliminate
|
||||
choosing a varying pointer would provide maximum flexibility and eliminate
|
||||
this restriction, it would reduce performance in the common case where a
|
||||
uniform pointer is all that's needed. As a work-around, a varying pointer
|
||||
can be used in cases where a varying lvalue reference would be desired.)
|
||||
@@ -1554,7 +1659,7 @@ and then a brace-delimited list of enumerators with optional values:
|
||||
|
||||
Each ``enum`` declaration defines a new type; an attempt to implicitly
|
||||
convert between enumerations of different types gives a compile-time error,
|
||||
but enuemrations of different types can be explicitly cast to one other.
|
||||
but enumerations of different types can be explicitly cast to one other.
|
||||
|
||||
::
|
||||
|
||||
@@ -1564,7 +1669,7 @@ Enumerators are implicitly converted to integer types, however, so they can
|
||||
be directly passed to routines that take integer parameters and can be used
|
||||
in expressions including integers, for example. However, the integer
|
||||
result of such an expression must be explicitly cast back to the enumerant
|
||||
type if it to be assigned to a variable with the enuemrant type.
|
||||
type if it to be assigned to a variable with the enumerant type.
|
||||
|
||||
::
|
||||
|
||||
@@ -1815,7 +1920,7 @@ Structures can also be initialized by providing element values in braces:
|
||||
....
|
||||
Color d = { 0.5, .75, 1.0 }; // r = 0.5, ...
|
||||
|
||||
Arrays of structures and arrays inside structures can be initialzed with
|
||||
Arrays of structures and arrays inside structures can be initialized with
|
||||
the expected syntax:
|
||||
|
||||
::
|
||||
@@ -1849,7 +1954,7 @@ Structure member access and array indexing also work as in C.
|
||||
return foo.f[4] - foo.i;
|
||||
|
||||
|
||||
The address-of operator, pointer derefernce operator, and pointer member
|
||||
The address-of operator, pointer dereference operator, and pointer member
|
||||
operator also work as expected.
|
||||
|
||||
::
|
||||
@@ -1889,12 +1994,37 @@ executes if the condition is false.
|
||||
else
|
||||
x *= 2.;
|
||||
|
||||
Conditional Statements: "switch"
|
||||
--------------------------------
|
||||
|
||||
The ``switch`` conditional statement is also available, again with the same
|
||||
behavior as in C; the expression used in the ``switch`` must be of integer
|
||||
type (but it can be uniform or varying). As in C, if there is no ``break``
|
||||
statement at the end of the code for a given case, execution "falls
|
||||
through" to the following case. These features are demonstrated in the
|
||||
code below.
|
||||
|
||||
::
|
||||
|
||||
int x = ...;
|
||||
switch (x) {
|
||||
case 0:
|
||||
case 1:
|
||||
foo(x);
|
||||
/* fall through */
|
||||
case 5:
|
||||
x = 0;
|
||||
break;
|
||||
default:
|
||||
x *= x;
|
||||
}
|
||||
|
||||
Basic Iteration Statements: "for", "while", and "do"
|
||||
----------------------------------------------------
|
||||
|
||||
``ispc`` supports ``for``, ``while``, and ``do`` loops, with the same
|
||||
specification as in C. Like C++, variables can be declared in the ``for``
|
||||
statment itself:
|
||||
statement itself:
|
||||
|
||||
::
|
||||
|
||||
@@ -1914,6 +2044,37 @@ one of them executes a ``continue`` statement, other program instances
|
||||
executing code in the loop body that didn't execute the ``continue`` will
|
||||
be unaffected by it.
|
||||
|
||||
Unstructured Control Flow: "goto"
|
||||
---------------------------------
|
||||
|
||||
``goto`` statements are allowed in ``ispc`` programs under limited
|
||||
circumstances; specifically, only when the compiler can determine that if
|
||||
any program instance executes a ``goto`` statement, then all of the program
|
||||
instances will be running at that statement, such that all will follow the
|
||||
``goto``.
|
||||
|
||||
Put another way: it's illegal for there to be "varying" control flow
|
||||
statements in scopes that enclose a ``goto`` statement. An error is issued
|
||||
if a ``goto`` is used in this situation.
|
||||
|
||||
The syntax for adding labels to ``ispc`` programs and jumping to them with
|
||||
``goto`` is the same as in C. The following code shows a ``goto`` based
|
||||
equivalent of a ``for`` loop where the induction variable ``i`` goes from
|
||||
zero to ten.
|
||||
|
||||
::
|
||||
|
||||
uniform int i = 0;
|
||||
check:
|
||||
if (i > 10)
|
||||
goto done;
|
||||
// loop body
|
||||
++i;
|
||||
goto check;
|
||||
done:
|
||||
// ...
|
||||
|
||||
|
||||
"Coherent" Control Flow Statements: "cif" and Friends
|
||||
-----------------------------------------------------
|
||||
|
||||
@@ -1978,7 +2139,7 @@ nested inside a ``foreach`` loop.) ``continue`` statements are legal in
|
||||
a program instances that executes a ``continue`` statement effectively
|
||||
skips over the rest of the loop body for the current iteration.
|
||||
|
||||
As a specific example, consdier the following ``foreach`` statement:
|
||||
As a specific example, consider the following ``foreach`` statement:
|
||||
|
||||
::
|
||||
|
||||
@@ -2076,7 +2237,7 @@ some computation on an array of data.
|
||||
}
|
||||
|
||||
Here, we've written a loop that explicitly loops over the data in chunks of
|
||||
``programCount`` elements. In each loop iteraton, the running program
|
||||
``programCount`` elements. In each loop iteration, the running program
|
||||
instances effectively collude amongst themselves using ``programIndex`` to
|
||||
determine which elements to work on in a way that ensures that all of the
|
||||
data elements will be processed. In this particular case, a ``foreach``
|
||||
@@ -2282,7 +2443,7 @@ distributions.
|
||||
If you are implementing your own task system, the remainder of this section
|
||||
discusses the requirements for these calls. You will also likely want to
|
||||
review the example task systems in ``examples/tasksys.cpp`` for reference.
|
||||
If you are not implmenting your own task system, you can skip reading the
|
||||
If you are not implementing your own task system, you can skip reading the
|
||||
remainder of this section.
|
||||
|
||||
Here are the declarations of the three functions that must be provided to
|
||||
@@ -2302,7 +2463,7 @@ implementation can efficiently wait for completion on just the tasks
|
||||
launched from a single function.
|
||||
|
||||
The first time one of ``ISPCLaunch()`` or ``ISPCAlloc()`` is called in an
|
||||
``ispc`` functon, the ``void *`` pointed to by the ``handlePtr`` parameter
|
||||
``ispc`` function, the ``void *`` pointed to by the ``handlePtr`` parameter
|
||||
will be ``NULL``. The implementations of these function should then
|
||||
initialize ``*handlePtr`` to a unique handle value of some sort. (For
|
||||
example, it might allocate a small structure to record which tasks were
|
||||
@@ -2318,14 +2479,14 @@ than a pointer to it, as in the other functions.
|
||||
|
||||
The ``ISPCAlloc()`` function is used to allocate small blocks of memory to
|
||||
store parameters passed to tasks. It should return a pointer to memory
|
||||
with the given aize and alignment. Note that there is no explicit
|
||||
with the given size and alignment. Note that there is no explicit
|
||||
``ISPCFree()`` call; instead, all memory allocated within an ``ispc``
|
||||
function should be freed when ``ISPCSync()`` is called.
|
||||
|
||||
``ISPCLaunch()`` is called to launch to launch one or more asynchronous
|
||||
tasks. Each ``launch`` statement in ``ispc`` code causes a call to
|
||||
``ISPCLaunch()`` to be emitted in the generated code. The three parameters
|
||||
after the handle pointer to thie function are relatively straightforward;
|
||||
after the handle pointer to the function are relatively straightforward;
|
||||
the ``void *f`` parameter holds a pointer to a function to call to run the
|
||||
work for this task, ``data`` holds a pointer to data to pass to this
|
||||
function, and ``count`` is the number of instances of this function to
|
||||
@@ -2340,7 +2501,7 @@ The signature of the provided function pointer ``f`` is
|
||||
int taskIndex, int taskCount)
|
||||
|
||||
When this function pointer is called by one of the hardware threads managed
|
||||
bythe task system, the ``data`` pointer passed to ``ISPCLaunch()`` should
|
||||
by the task system, the ``data`` pointer passed to ``ISPCLaunch()`` should
|
||||
be passed to it for its first parameter; ``threadCount`` gives the total
|
||||
number of hardware threads that have been spawned to run tasks and
|
||||
``threadIndex`` should be an integer index between zero and ``threadCount``
|
||||
@@ -2659,7 +2820,7 @@ generates the following output on a four-wide compilation target:
|
||||
When a varying variable is printed, the values for program instances that
|
||||
aren't currently executing are printed inside double parenthesis,
|
||||
indicating inactive program instances. The elements for inactive program
|
||||
instances may have garabge values, though in some circumstances it can be
|
||||
instances may have garbage values, though in some circumstances it can be
|
||||
useful to see their values.
|
||||
|
||||
Assertions
|
||||
@@ -2879,7 +3040,7 @@ If called when none of the program instances are running,
|
||||
There are also a number of functions to compute "scan"s of values across
|
||||
the program instances. For example, the ``exclusive_scan_and()`` function
|
||||
computes, for each program instance, the sum of the given value over all of
|
||||
the preceeding program instances. (The scans currently available in
|
||||
the preceding program instances. (The scans currently available in
|
||||
``ispc`` are all so-called "exclusive" scans, meaning that the value
|
||||
computed for a given element does not include the value provided for that
|
||||
element.) In C code, an exclusive add scan over an array might be
|
||||
@@ -3175,7 +3336,7 @@ rather than one per program instance.
|
||||
uniform int32 newval)
|
||||
|
||||
Be careful that you use the atomic function that you mean to; consider the
|
||||
folloiwng code:
|
||||
following code:
|
||||
|
||||
::
|
||||
|
||||
@@ -3532,7 +3693,7 @@ Restructuring Existing Programs to Use ISPC
|
||||
|
||||
``ispc`` is designed to enable you to incorporate
|
||||
SPMD parallelism into existing code with minimal modification; features
|
||||
like the ability to share memory and data structures betwen C/C++ and
|
||||
like the ability to share memory and data structures between C/C++ and
|
||||
``ispc`` code and the ability to directly call back and forth between
|
||||
``ispc`` and C/C++ are motivated by this. These features also make it
|
||||
easy to incrementally transform a program to use ``ispc``; the most
|
||||
@@ -3708,12 +3869,6 @@ elements to work with and then proceeds with the computation.
|
||||
}
|
||||
|
||||
|
||||
Related Languages
|
||||
=================
|
||||
|
||||
TODO: rsl, C*, IVL
|
||||
|
||||
|
||||
Disclaimer and Legal Information
|
||||
================================
|
||||
|
||||
@@ -22,8 +22,8 @@ also included in the ``examples/`` directory.)
|
||||
- ``ispc``, 1 core
|
||||
- ``ispc``, 4 cores
|
||||
* - `AOBench`_ (512 x 512 resolution)
|
||||
- 3.99x
|
||||
- 19.32x
|
||||
- 6.19x
|
||||
- 28.06x
|
||||
* - `Binomial Options`_ (128k options)
|
||||
- 7.94x
|
||||
- 33.43x
|
||||
@@ -31,23 +31,23 @@ also included in the ``examples/`` directory.)
|
||||
- 8.45x
|
||||
- 32.48x
|
||||
* - `Deferred Shading`_ (1280p)
|
||||
- n/a
|
||||
- 5.02x
|
||||
- 23.06x
|
||||
* - `Mandelbrot Set`_
|
||||
- 6.21x
|
||||
- 19.90x
|
||||
- 20.28x
|
||||
* - `Perlin Noise Function`_
|
||||
- 5.37x
|
||||
- n/a
|
||||
* - `Ray Tracer`_ (Sponza dataset)
|
||||
- 3.99x
|
||||
- 19.32x
|
||||
- 4.31x
|
||||
- 20.29x
|
||||
* - `3D Stencil`_
|
||||
- 3.76x
|
||||
- 13.79x
|
||||
- 4.05x
|
||||
- 15.53x
|
||||
* - `Volume Rendering`_
|
||||
- 3.11x
|
||||
- 15.80x
|
||||
- 3.60x
|
||||
- 17.53x
|
||||
|
||||
|
||||
.. _AOBench: https://github.com/ispc/ispc/tree/master/examples/aobench
|
||||
@@ -64,7 +64,7 @@ on each one:
|
||||
Depending on the specifics of the computation being performed, the code
|
||||
generated for this function could likely be improved by modifying the code
|
||||
so that the loop only goes as far through the data as is possible to pack
|
||||
an entire gang of program instances with computation each time thorugh the
|
||||
an entire gang of program instances with computation each time through the
|
||||
loop. Doing so enables the ``ispc`` compiler to generate more efficient
|
||||
code for cases where it knows that the execution mask is "all on". Then,
|
||||
an ``if`` statement at the end handles processing the ragged extra bits of
|
||||
@@ -153,7 +153,7 @@ processed, and so forth.
|
||||
|
||||
Performance benefit can come from using ``foreach_tiled`` in that it
|
||||
essentially optimizes for the benefit of iterating over *compact* regions
|
||||
of the domian (while ``foreach`` iterates over the domain in a way that
|
||||
of the domain (while ``foreach`` iterates over the domain in a way that
|
||||
generally allows linear memory access.) There are two benefits from
|
||||
processing compact regions of the domain.
|
||||
|
||||
@@ -215,7 +215,7 @@ Use "uniform" Whenever Appropriate
|
||||
----------------------------------
|
||||
|
||||
For any variable that will always have the same value across all of the
|
||||
program instances in a gang, declare the variable with the ``unfiorm``
|
||||
program instances in a gang, declare the variable with the ``uniform``
|
||||
qualifier. Doing so enables the ``ispc`` compiler to emit better code in
|
||||
many different ways.
|
||||
|
||||
@@ -229,7 +229,7 @@ number of iterations:
|
||||
|
||||
If this is written with ``i`` as a ``varying`` variable, as above, there's
|
||||
additional overhead in the code generated for the loop as the compiler
|
||||
emits instructions to handle the possibilty of not all program instances
|
||||
emits instructions to handle the possibility of not all program instances
|
||||
following the same control flow path (as might be the case if the loop
|
||||
limit, 10, was itself a ``varying`` value.)
|
||||
|
||||
@@ -568,7 +568,7 @@ mask of all lanes currently executing (assuming a four-wide gang size
|
||||
target machine).
|
||||
|
||||
For a fuller example of the utility of this functionality, see
|
||||
``examples/aobench_instrumented`` in the ``ispc`` distribution. Ths
|
||||
``examples/aobench_instrumented`` in the ``ispc`` distribution. This
|
||||
example includes an implementation of the ``ISPCInstrument()`` function
|
||||
that collects aggregate data about the program's execution behavior.
|
||||
|
||||
@@ -45,8 +45,7 @@
|
||||
developers mailing list</a></li>
|
||||
<li><a href="http://github.com/ispc/ispc/wiki/">Wiki</a></li>
|
||||
<li><a href="http://github.com/ispc/ispc/issues/">Bug tracking</a></li>
|
||||
<li><a href="doxygen/index.html">Doxygen documentation of
|
||||
<tt>ispc</tt> source code</a></li>
|
||||
<li><a href="doxygen/index.html">Doxygen</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -45,8 +45,7 @@
|
||||
developers mailing list</a></li>
|
||||
<li><a href="http://github.com/ispc/ispc/wiki/">Wiki</a></li>
|
||||
<li><a href="http://github.com/ispc/ispc/issues/">Bug tracking</a></li>
|
||||
<li><a href="doxygen/index.html">Doxygen documentation of
|
||||
<tt>ispc</tt> source code</a></li>
|
||||
<li><a href="doxygen/index.html">Doxygen</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.1.0
|
||||
PROJECT_NUMBER = 1.1.3
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
||||
@@ -1,39 +1,7 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=ao
|
||||
CPP_SRC=ao.cpp ao_serial.cpp
|
||||
ISPC_SRC=ao.ispc
|
||||
ISPC_TARGETS=sse2,sse4,avx
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4,avx --arch=x86-64
|
||||
|
||||
ISPC_OBJS=objs/ao_ispc.o objs/ao_ispc_sse2.o objs/ao_ispc_sse4.o \
|
||||
objs/ao_ispc_avx.o
|
||||
OBJS=objs/ao.o objs/ao_serial.o $(ISPC_OBJS) $(TASK_OBJ)
|
||||
|
||||
default: ao
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ ao
|
||||
|
||||
ao: dirs $(OBJS) $(TASK_OBJ)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/ao.o: objs/ao_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -82,7 +82,7 @@ static inline void vnormalize(vec &v) {
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
static void
|
||||
ray_plane_intersect(Isect &isect, Ray &ray, Plane &plane) {
|
||||
float d = -dot(plane.p, plane.n);
|
||||
float v = dot(ray.dir, plane.n);
|
||||
@@ -124,7 +124,7 @@ ray_sphere_intersect(Isect &isect, Ray &ray, Sphere &sphere) {
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
static void
|
||||
orthoBasis(vec basis[3], vec n) {
|
||||
basis[2] = n;
|
||||
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
|
||||
@@ -147,7 +147,7 @@ orthoBasis(vec basis[3], vec n) {
|
||||
}
|
||||
|
||||
|
||||
static inline float
|
||||
static float
|
||||
ambient_occlusion(Isect &isect, Plane &plane, Sphere spheres[3],
|
||||
RNGState &rngstate) {
|
||||
float eps = 0.0001f;
|
||||
|
||||
@@ -14,13 +14,13 @@ dirs:
|
||||
clean:
|
||||
/bin/rm -rf objs *~ ao
|
||||
|
||||
ao: dirs objs/ao.o objs/instrument.o objs/ao_ispc.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ objs/ao.o objs/ao_ispc.o objs/instrument.o -lm -lpthread
|
||||
ao: objs/ao.o objs/instrument.o objs/ao_ispc.o ../tasksys.cpp
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ -lm -lpthread
|
||||
|
||||
objs/%.o: %.cpp
|
||||
objs/%.o: %.cpp dirs
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/ao.o: objs/ao_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc dirs
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_instrumented_ispc.h
|
||||
|
||||
59
examples/common.mk
Normal file
59
examples/common.mk
Normal file
@@ -0,0 +1,59 @@
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=tasksys.o
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O2 -m64
|
||||
LIBS=-lm $(TASK_LIB) -lstdc++
|
||||
ISPC=ispc -O2 --arch=x86-64 $(ISPC_FLAGS)
|
||||
ISPC_OBJS=$(addprefix objs/, $(ISPC_SRC:.ispc=)_ispc.o $(ISPC_SRC:.ispc=)_ispc_sse2.o \
|
||||
$(ISPC_SRC:.ispc=)_ispc_sse4.o $(ISPC_SRC:.ispc=)_ispc_avx.o)
|
||||
ISPC_HEADER=objs/$(ISPC_SRC:.ispc=_ispc.h)
|
||||
CPP_OBJS=$(addprefix objs/, $(CPP_SRC:.cpp=.o) $(TASK_OBJ))
|
||||
|
||||
default: $(EXAMPLE)
|
||||
|
||||
all: $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
objs/%.cpp objs/%.o objs/%.h: dirs
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16
|
||||
|
||||
$(EXAMPLE): $(CPP_OBJS) $(ISPC_OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
objs/%.o: %.cpp dirs $(ISPC_HEADER)
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp dirs
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/$(EXAMPLE).o: objs/$(EXAMPLE)_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) --target=$(ISPC_TARGETS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_sse4.cpp: $(ISPC_SRC)
|
||||
$(ISPC) $< -o $@ --target=generic-4 --emit-c++ --c++-include-file=sse4.h
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_sse4.o: objs/$(ISPC_SRC:.ispc=)_sse4.cpp
|
||||
$(CXX) -I../intrinsics -msse4.2 $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
$(EXAMPLE)-sse4: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_sse4.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_generic16.cpp: $(ISPC_SRC)
|
||||
$(ISPC) $< -o $@ --target=generic-16 --emit-c++ --c++-include-file=generic-16.h
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_generic16.o: objs/$(ISPC_SRC:.ispc=)_generic16.cpp
|
||||
$(CXX) -I../intrinsics $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
$(EXAMPLE)-generic16: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_generic16.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
@@ -1,38 +1,8 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=deferred_shading
|
||||
CPP_SRC=common.cpp main.cpp dynamic_c.cpp dynamic_cilk.cpp
|
||||
ISPC_SRC=kernels.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx-x2
|
||||
ISPC_FLAGS=--opt=fast-math
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64 --math-lib=fast
|
||||
|
||||
OBJS=objs/main.o objs/common.o objs/kernels_ispc.o objs/kernels_ispc_sse2.o \
|
||||
objs/kernels_ispc_sse4.o objs/kernels_ispc_avx.o \
|
||||
objs/dynamic_c.o objs/dynamic_cilk.o
|
||||
|
||||
default: deferred_shading
|
||||
|
||||
.PHONY: dirs clean
|
||||
.PRECIOUS: objs/kernels_ispc.h
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ deferred_shading
|
||||
|
||||
deferred_shading: dirs $(OBJS) $(TASK_OBJ)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) $(TASK_OBJ) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp objs/kernels_ispc.h deferred.h
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
1478
examples/intrinsics/generic-16.h
Normal file
1478
examples/intrinsics/generic-16.h
Normal file
File diff suppressed because it is too large
Load Diff
3776
examples/intrinsics/sse4.h
Normal file
3776
examples/intrinsics/sse4.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,30 +1,7 @@
|
||||
|
||||
CXX=g++ -m64
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64
|
||||
EXAMPLE=mandelbrot
|
||||
CPP_SRC=mandelbrot.cpp mandelbrot_serial.cpp
|
||||
ISPC_SRC=mandelbrot.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx-x2
|
||||
|
||||
default: mandelbrot
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ mandelbrot
|
||||
|
||||
OBJS=objs/mandelbrot.o objs/mandelbrot_serial.o objs/mandelbrot_ispc_sse2.o \
|
||||
objs/mandelbrot_ispc_sse4.o objs/mandelbrot_ispc_avx.o \
|
||||
objs/mandelbrot_ispc.o
|
||||
|
||||
mandelbrot: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/mandelbrot.o: objs/mandelbrot_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -1,39 +1,7 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=mandelbrot
|
||||
CPP_SRC=mandelbrot.cpp mandelbrot_serial.cpp
|
||||
ISPC_SRC=mandelbrot.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx-x2
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64
|
||||
|
||||
OBJS=objs/mandelbrot.o objs/mandelbrot_serial.o $(TASK_OBJ) \
|
||||
objs/mandelbrot_ispc.o objs/mandelbrot_ispc_sse2.o \
|
||||
objs/mandelbrot_ispc_sse4.o objs/mandelbrot_ispc_avx.o
|
||||
|
||||
default: mandelbrot
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ mandelbrot
|
||||
|
||||
mandelbrot: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/mandelbrot.o: objs/mandelbrot_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -49,17 +49,16 @@ mandel(float c_re, float c_im, int count) {
|
||||
}
|
||||
|
||||
|
||||
/* Task to compute the Mandelbrot iterations for a span of scanlines from
|
||||
[ystart,yend).
|
||||
/* Task to compute the Mandelbrot iterations for a single scanline.
|
||||
*/
|
||||
task void
|
||||
mandelbrot_scanlines(uniform int ybase, uniform int span,
|
||||
uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int maxIterations,
|
||||
uniform int output[]) {
|
||||
uniform int ystart = ybase + taskIndex * span;
|
||||
uniform int yend = ystart + span;
|
||||
mandelbrot_scanline(uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int height,
|
||||
uniform int span,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform int ystart = taskIndex * span;
|
||||
uniform int yend = min((taskIndex+1) * span, (unsigned int)height);
|
||||
|
||||
foreach (yi = ystart ... yend, xi = 0 ... width) {
|
||||
float x = x0 + xi * dx;
|
||||
@@ -71,20 +70,6 @@ mandelbrot_scanlines(uniform int ybase, uniform int span,
|
||||
}
|
||||
|
||||
|
||||
task void
|
||||
mandelbrot_chunk(uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int height,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform int ystart = taskIndex * (height/taskCount);
|
||||
uniform int yend = (taskIndex+1) * (height/taskCount);
|
||||
uniform int span = 1;
|
||||
|
||||
launch[(yend-ystart)/span] < mandelbrot_scanlines(ystart, span, x0, dx, y0, dy,
|
||||
width, maxIterations, output) >;
|
||||
}
|
||||
|
||||
|
||||
export void
|
||||
mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform float x1, uniform float y1,
|
||||
@@ -92,7 +77,8 @@ mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform int maxIterations, uniform int output[]) {
|
||||
uniform float dx = (x1 - x0) / width;
|
||||
uniform float dy = (y1 - y0) / height;
|
||||
uniform int span = 4;
|
||||
|
||||
launch[32] < mandelbrot_chunk(x0, dx, y0, dy, width, height,
|
||||
maxIterations, output) >;
|
||||
launch[height/span] < mandelbrot_scanline(x0, dx, y0, dy, width, height, span,
|
||||
maxIterations, output) >;
|
||||
}
|
||||
|
||||
@@ -1,29 +1,7 @@
|
||||
|
||||
CXX=g++ -m64
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4,avx-x2 --arch=x86-64
|
||||
EXAMPLE=noise
|
||||
CPP_SRC=$(EXAMPLE).cpp $(EXAMPLE)_serial.cpp
|
||||
ISPC_SRC=noise.ispc
|
||||
ISPC_TARGETS=sse2,sse4,avx-x2
|
||||
|
||||
OBJS=objs/noise.o objs/noise_serial.o objs/noise_ispc.o objs/noise_ispc_sse2.o \
|
||||
objs/noise_ispc_sse4.o objs/noise_ispc_avx.o
|
||||
|
||||
default: noise
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ noise
|
||||
|
||||
noise: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/noise.o: objs/noise_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -1,38 +1,7 @@
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
EXAMPLE=options
|
||||
CPP_SRC=options.cpp options_serial.cpp
|
||||
ISPC_SRC=options.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx-x2
|
||||
|
||||
|
||||
CXX=g++ -m64
|
||||
CXXFLAGS=-Iobjs/ -g -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx-x2 --arch=x86-64
|
||||
|
||||
OBJS=objs/options.o objs/options_serial.o objs/options_ispc.o \
|
||||
objs/options_ispc_sse2.o objs/options_ispc_sse4.o \
|
||||
objs/options_ispc_avx.o $(TASK_OBJ)
|
||||
|
||||
default: options
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ options
|
||||
|
||||
options: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/options.o: objs/options_ispc.h options_defs.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc options_defs.h
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -1,38 +1,7 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=rt
|
||||
CPP_SRC=rt.cpp rt_serial.cpp
|
||||
ISPC_SRC=rt.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64
|
||||
|
||||
OBJS=objs/rt.o objs/rt_serial.o $(TASK_OBJ) objs/rt_ispc.o objs/rt_ispc_sse2.o \
|
||||
objs/rt_ispc_sse4.o objs/rt_ispc_avx.o
|
||||
|
||||
default: rt
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ rt
|
||||
|
||||
rt: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/rt.o: objs/rt_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -104,8 +104,8 @@ static void generateRay(uniform const float raster2camera[4][4],
|
||||
}
|
||||
|
||||
|
||||
static inline bool BBoxIntersect(const uniform float bounds[2][3],
|
||||
const Ray &ray) {
|
||||
static bool BBoxIntersect(const uniform float bounds[2][3],
|
||||
const Ray &ray) {
|
||||
uniform float3 bounds0 = { bounds[0][0], bounds[0][1], bounds[0][2] };
|
||||
uniform float3 bounds1 = { bounds[1][0], bounds[1][1], bounds[1][2] };
|
||||
float t0 = ray.mint, t1 = ray.maxt;
|
||||
@@ -143,7 +143,7 @@ static inline bool BBoxIntersect(const uniform float bounds[2][3],
|
||||
|
||||
|
||||
|
||||
static inline bool TriIntersect(const Triangle &tri, Ray &ray) {
|
||||
static bool TriIntersect(const Triangle &tri, Ray &ray) {
|
||||
uniform float3 p0 = { tri.p[0][0], tri.p[0][1], tri.p[0][2] };
|
||||
uniform float3 p1 = { tri.p[1][0], tri.p[1][1], tri.p[1][2] };
|
||||
uniform float3 p2 = { tri.p[2][0], tri.p[2][1], tri.p[2][2] };
|
||||
|
||||
@@ -1,39 +1,7 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=stencil
|
||||
CPP_SRC=stencil.cpp stencil_serial.cpp
|
||||
ISPC_SRC=stencil.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx-x2
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2,avx --arch=x86-64
|
||||
|
||||
OBJS=objs/stencil.o objs/stencil_serial.o $(TASK_OBJ) objs/stencil_ispc.o \
|
||||
objs/stencil_ispc_sse2.o objs/stencil_ispc_sse4.o \
|
||||
objs/stencil_ispc_avx.o
|
||||
|
||||
default: stencil
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ stencil
|
||||
|
||||
stencil: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/stencil.o: objs/stencil_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o objs/%_ispc_avx.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -41,27 +41,23 @@ stencil_step(uniform int x0, uniform int x1,
|
||||
uniform const float Ain[], uniform float Aout[]) {
|
||||
const uniform int Nxy = Nx * Ny;
|
||||
|
||||
for (uniform int z = z0; z < z1; ++z) {
|
||||
for (uniform int y = y0; y < y1; ++y) {
|
||||
foreach (x = x0 ... x1) {
|
||||
int index = (z * Nxy) + (y * Nx) + x;
|
||||
foreach (z = z0 ... z1, y = y0 ... y1, x = x0 ... x1) {
|
||||
int index = (z * Nxy) + (y * Nx) + x;
|
||||
#define A_cur(x, y, z) Ain[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||
#define A_next(x, y, z) Aout[index + (x) + ((y) * Nx) + ((z) * Nxy)]
|
||||
float div = coef[0] * A_cur(0, 0, 0) +
|
||||
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
|
||||
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
|
||||
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
|
||||
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
|
||||
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
|
||||
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
|
||||
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
|
||||
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
|
||||
A_cur(0, 0, +3) + A_cur(0, 0, -3));
|
||||
float div = coef[0] * A_cur(0, 0, 0) +
|
||||
coef[1] * (A_cur(+1, 0, 0) + A_cur(-1, 0, 0) +
|
||||
A_cur(0, +1, 0) + A_cur(0, -1, 0) +
|
||||
A_cur(0, 0, +1) + A_cur(0, 0, -1)) +
|
||||
coef[2] * (A_cur(+2, 0, 0) + A_cur(-2, 0, 0) +
|
||||
A_cur(0, +2, 0) + A_cur(0, -2, 0) +
|
||||
A_cur(0, 0, +2) + A_cur(0, 0, -2)) +
|
||||
coef[3] * (A_cur(+3, 0, 0) + A_cur(-3, 0, 0) +
|
||||
A_cur(0, +3, 0) + A_cur(0, -3, 0) +
|
||||
A_cur(0, 0, +3) + A_cur(0, 0, -3));
|
||||
|
||||
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
|
||||
vsq[index] * div;
|
||||
}
|
||||
}
|
||||
A_next(0, 0, 0) = 2 * A_cur(0, 0, 0) - A_next(0, 0, 0) +
|
||||
vsq[index] * div;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,11 +65,12 @@ stencil_step(uniform int x0, uniform int x1,
|
||||
static task void
|
||||
stencil_step_task(uniform int x0, uniform int x1,
|
||||
uniform int y0, uniform int y1,
|
||||
uniform int z0, uniform int z1,
|
||||
uniform int z0,
|
||||
uniform int Nx, uniform int Ny, uniform int Nz,
|
||||
uniform const float coef[4], uniform const float vsq[],
|
||||
uniform const float Ain[], uniform float Aout[]) {
|
||||
stencil_step(x0, x1, y0, y1, z0, z1, Nx, Ny, Nz, coef, vsq, Ain, Aout);
|
||||
stencil_step(x0, x1, y0, y1, z0+taskIndex, z0+taskIndex+1,
|
||||
Nx, Ny, Nz, coef, vsq, Ain, Aout);
|
||||
}
|
||||
|
||||
|
||||
@@ -89,17 +86,14 @@ loop_stencil_ispc_tasks(uniform int t0, uniform int t1,
|
||||
{
|
||||
for (uniform int t = t0; t < t1; ++t) {
|
||||
// Parallelize across cores as well: each task will work on a slice
|
||||
// of "dz" in the z extent of the volume. (dz=1 seems to work
|
||||
// better than any larger values.)
|
||||
uniform int dz = 1;
|
||||
for (uniform int z = z0; z < z1; z += dz) {
|
||||
if ((t & 1) == 0)
|
||||
launch < stencil_step_task(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz,
|
||||
coef, vsq, Aeven, Aodd) >;
|
||||
else
|
||||
launch < stencil_step_task(x0, x1, y0, y1, z, z+dz, Nx, Ny, Nz,
|
||||
coef, vsq, Aodd, Aeven) >;
|
||||
}
|
||||
// of 1 in the z extent of the volume.
|
||||
if ((t & 1) == 0)
|
||||
launch[z1-z0] < stencil_step_task(x0, x1, y0, y1, z0, Nx, Ny, Nz,
|
||||
coef, vsq, Aeven, Aodd) >;
|
||||
else
|
||||
launch[z1-z0] < stencil_step_task(x0, x1, y0, y1, z0, Nx, Ny, Nz,
|
||||
coef, vsq, Aodd, Aeven) >;
|
||||
|
||||
// We need to wait for all of the launched tasks to finish before
|
||||
// starting the next iteration.
|
||||
sync;
|
||||
|
||||
@@ -1,38 +1,7 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
EXAMPLE=volume
|
||||
CPP_SRC=volume.cpp volume_serial.cpp
|
||||
ISPC_SRC=volume.ispc
|
||||
ISPC_TARGETS=sse2,sse4-x2,avx
|
||||
|
||||
TASK_CXX=../tasksys.cpp
|
||||
TASK_LIB=-lpthread
|
||||
TASK_OBJ=$(addprefix objs/, $(subst ../,, $(TASK_CXX:.cpp=.o)))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall -m64
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse2,sse4-x2 --arch=x86-64
|
||||
|
||||
OBJS=objs/volume.o objs/volume_serial.o $(TASK_OBJ) objs/volume_ispc.o \
|
||||
objs/volume_ispc_sse2.o objs/volume_ispc_sse4.o
|
||||
|
||||
default: volume
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ volume
|
||||
|
||||
volume: dirs $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%.o: ../%.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/volume.o: objs/volume_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o objs/%_ispc_sse2.o objs/%_ispc_sse4.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
include ../common.mk
|
||||
|
||||
@@ -124,24 +124,13 @@ static inline float D(int x, int y, int z, uniform int nVoxels[3],
|
||||
}
|
||||
|
||||
|
||||
static inline float Du(uniform int x, uniform int y, uniform int z,
|
||||
uniform int nVoxels[3], uniform float density[]) {
|
||||
x = clamp(x, 0, nVoxels[0]-1);
|
||||
y = clamp(y, 0, nVoxels[1]-1);
|
||||
z = clamp(z, 0, nVoxels[2]-1);
|
||||
|
||||
return density[z*nVoxels[0]*nVoxels[1] + y*nVoxels[0] + x];
|
||||
}
|
||||
|
||||
|
||||
static inline float3 Offset(float3 p, float3 pMin, float3 pMax) {
|
||||
return (p - pMin) / (pMax - pMin);
|
||||
}
|
||||
|
||||
|
||||
static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
uniform float density[], uniform int nVoxels[3],
|
||||
uniform bool &checkForSameVoxel) {
|
||||
static float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
uniform float density[], uniform int nVoxels[3]) {
|
||||
if (!Inside(Pobj, pMin, pMax))
|
||||
return 0;
|
||||
// Compute voxel coordinates and offsets for _Pobj_
|
||||
@@ -153,39 +142,14 @@ static inline float Density(float3 Pobj, float3 pMin, float3 pMax,
|
||||
float dx = vox.x - vx, dy = vox.y - vy, dz = vox.z - vz;
|
||||
|
||||
// Trilinearly interpolate density values to compute local density
|
||||
float d00, d10, d01, d11;
|
||||
uniform int uvx, uvy, uvz;
|
||||
if (checkForSameVoxel && reduce_equal(vx, &uvx) && reduce_equal(vy, &uvy) &&
|
||||
reduce_equal(vz, &uvz)) {
|
||||
// If all of the program instances are inside the same voxel, then
|
||||
// we'll call the 'uniform' variant of the voxel density lookup
|
||||
// function, thus doing a single load for each value rather than a
|
||||
// gather.
|
||||
d00 = Lerp(dx, Du(uvx, uvy, uvz, nVoxels, density),
|
||||
Du(uvx+1, uvy, uvz, nVoxels, density));
|
||||
d10 = Lerp(dx, Du(uvx, uvy+1, uvz, nVoxels, density),
|
||||
Du(uvx+1, uvy+1, uvz, nVoxels, density));
|
||||
d01 = Lerp(dx, Du(uvx, uvy, uvz+1, nVoxels, density),
|
||||
Du(uvx+1, uvy, uvz+1, nVoxels, density));
|
||||
d11 = Lerp(dx, Du(uvx, uvy+1, uvz+1, nVoxels, density),
|
||||
Du(uvx+1, uvy+1, uvz+1, nVoxels, density));
|
||||
}
|
||||
else {
|
||||
// Otherwise, we have to do an actual gather in the more general
|
||||
// D() function. Once the reduce_equal tests above fail, we stop
|
||||
// checking in subsequent steps, since it's unlikely that this will
|
||||
// be true in the future once they've diverged into different
|
||||
// voxels.
|
||||
checkForSameVoxel = false;
|
||||
d00 = Lerp(dx, D(vx, vy, vz, nVoxels, density),
|
||||
D(vx+1, vy, vz, nVoxels, density));
|
||||
d10 = Lerp(dx, D(vx, vy+1, vz, nVoxels, density),
|
||||
D(vx+1, vy+1, vz, nVoxels, density));
|
||||
d01 = Lerp(dx, D(vx, vy, vz+1, nVoxels, density),
|
||||
D(vx+1, vy, vz+1, nVoxels, density));
|
||||
d11 = Lerp(dx, D(vx, vy+1, vz+1, nVoxels, density),
|
||||
D(vx+1, vy+1, vz+1, nVoxels, density));
|
||||
}
|
||||
float d00 = Lerp(dx, D(vx, vy, vz, nVoxels, density),
|
||||
D(vx+1, vy, vz, nVoxels, density));
|
||||
float d10 = Lerp(dx, D(vx, vy+1, vz, nVoxels, density),
|
||||
D(vx+1, vy+1, vz, nVoxels, density));
|
||||
float d01 = Lerp(dx, D(vx, vy, vz+1, nVoxels, density),
|
||||
D(vx+1, vy, vz+1, nVoxels, density));
|
||||
float d11 = Lerp(dx, D(vx, vy+1, vz+1, nVoxels, density),
|
||||
D(vx+1, vy+1, vz+1, nVoxels, density));
|
||||
float d0 = Lerp(dy, d00, d10);
|
||||
float d1 = Lerp(dy, d01, d11);
|
||||
return Lerp(dz, d0, d1);
|
||||
@@ -221,10 +185,8 @@ transmittance(uniform float3 p0, float3 p1, uniform float3 pMin,
|
||||
float t = rayT0;
|
||||
float3 pos = ray.origin + ray.dir * rayT0;
|
||||
float3 dirStep = ray.dir * stepT;
|
||||
uniform bool checkForSameVoxel = true;
|
||||
while (t < rayT1) {
|
||||
tau += stepDist * sigma_t * Density(pos, pMin, pMax, density, nVoxels,
|
||||
checkForSameVoxel);
|
||||
tau += stepDist * sigma_t * Density(pos, pMin, pMax, density, nVoxels);
|
||||
pos = pos + dirStep;
|
||||
t += stepT;
|
||||
}
|
||||
@@ -268,9 +230,8 @@ raymarch(uniform float density[], uniform int nVoxels[3], Ray ray) {
|
||||
float t = rayT0;
|
||||
float3 pos = ray.origin + ray.dir * rayT0;
|
||||
float3 dirStep = ray.dir * stepT;
|
||||
uniform bool checkForSameVoxel = true;
|
||||
cwhile (t < rayT1) {
|
||||
float d = Density(pos, pMin, pMax, density, nVoxels, checkForSameVoxel);
|
||||
float d = Density(pos, pMin, pMax, density, nVoxels);
|
||||
|
||||
// terminate once attenuation is high
|
||||
float atten = exp(-tau);
|
||||
|
||||
@@ -156,18 +156,18 @@
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="volume.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
|
||||
10
expr.h
10
expr.h
@@ -314,7 +314,6 @@ public:
|
||||
std::string identifier;
|
||||
const SourcePos identifierPos;
|
||||
|
||||
protected:
|
||||
MemberExpr(Expr *expr, const char *identifier, SourcePos pos,
|
||||
SourcePos identifierPos, bool derefLValue);
|
||||
|
||||
@@ -493,8 +492,7 @@ private:
|
||||
probably-different type. */
|
||||
class TypeCastExpr : public Expr {
|
||||
public:
|
||||
TypeCastExpr(const Type *t, Expr *e, bool preserveUniformity,
|
||||
SourcePos p);
|
||||
TypeCastExpr(const Type *t, Expr *e, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
@@ -507,7 +505,6 @@ public:
|
||||
|
||||
const Type *type;
|
||||
Expr *expr;
|
||||
bool preserveUniformity;
|
||||
};
|
||||
|
||||
|
||||
@@ -634,13 +631,14 @@ public:
|
||||
being done just given type information without the parameter
|
||||
argument expressions being available. It returns true on success.
|
||||
*/
|
||||
bool ResolveOverloads(const std::vector<const Type *> &argTypes,
|
||||
bool ResolveOverloads(SourcePos argPos,
|
||||
const std::vector<const Type *> &argTypes,
|
||||
const std::vector<bool> *argCouldBeNULL = NULL);
|
||||
Symbol *GetMatchingFunction();
|
||||
|
||||
private:
|
||||
bool tryResolve(int (*matchFunc)(const Type *, const Type *),
|
||||
const std::vector<const Type *> &argTypes,
|
||||
SourcePos argPos, const std::vector<const Type *> &argTypes,
|
||||
const std::vector<bool> *argCouldBeNULL);
|
||||
|
||||
/** Name of the function that is being called. */
|
||||
|
||||
142
func.cpp
142
func.cpp
@@ -72,17 +72,10 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
code = c;
|
||||
|
||||
maskSymbol = m->symbolTable->LookupVariable("__mask");
|
||||
assert(maskSymbol != NULL);
|
||||
Assert(maskSymbol != NULL);
|
||||
|
||||
if (code != NULL) {
|
||||
if (g->debugPrint) {
|
||||
fprintf(stderr, "Creating function \"%s\". Initial code:\n",
|
||||
sym->name.c_str());
|
||||
code->Print(0);
|
||||
fprintf(stderr, "---------------------\n");
|
||||
}
|
||||
|
||||
code = code->TypeCheck();
|
||||
code = TypeCheck(code);
|
||||
|
||||
if (code != NULL && g->debugPrint) {
|
||||
fprintf(stderr, "After typechecking function \"%s\":\n",
|
||||
@@ -92,7 +85,7 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
}
|
||||
|
||||
if (code != NULL) {
|
||||
code = code->Optimize();
|
||||
code = Optimize(code);
|
||||
if (g->debugPrint) {
|
||||
fprintf(stderr, "After optimizing function \"%s\":\n",
|
||||
sym->name.c_str());
|
||||
@@ -109,7 +102,7 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
}
|
||||
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
|
||||
for (unsigned int i = 0; i < args.size(); ++i)
|
||||
if (dynamic_cast<const ReferenceType *>(args[i]->type) == NULL)
|
||||
@@ -117,13 +110,13 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
|
||||
if (type->isTask) {
|
||||
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
|
||||
assert(threadIndexSym);
|
||||
Assert(threadIndexSym);
|
||||
threadCountSym = m->symbolTable->LookupVariable("threadCount");
|
||||
assert(threadCountSym);
|
||||
Assert(threadCountSym);
|
||||
taskIndexSym = m->symbolTable->LookupVariable("taskIndex");
|
||||
assert(taskIndexSym);
|
||||
Assert(taskIndexSym);
|
||||
taskCountSym = m->symbolTable->LookupVariable("taskCount");
|
||||
assert(taskCountSym);
|
||||
Assert(taskCountSym);
|
||||
}
|
||||
else
|
||||
threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL;
|
||||
@@ -133,7 +126,7 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
const Type *
|
||||
Function::GetReturnType() const {
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
return type->GetReturnType();
|
||||
}
|
||||
|
||||
@@ -141,7 +134,7 @@ Function::GetReturnType() const {
|
||||
const FunctionType *
|
||||
Function::GetType() const {
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
return type;
|
||||
}
|
||||
|
||||
@@ -157,9 +150,9 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
|
||||
// We expect the argument structure to come in as a poitner to a
|
||||
// structure. Confirm and figure out its type here.
|
||||
const llvm::Type *structArgType = structArgPtr->getType();
|
||||
assert(llvm::isa<llvm::PointerType>(structArgType));
|
||||
Assert(llvm::isa<llvm::PointerType>(structArgType));
|
||||
const llvm::PointerType *pt = llvm::dyn_cast<const llvm::PointerType>(structArgType);
|
||||
assert(llvm::isa<llvm::StructType>(pt->getElementType()));
|
||||
Assert(llvm::isa<llvm::StructType>(pt->getElementType()));
|
||||
const llvm::StructType *argStructType =
|
||||
llvm::dyn_cast<const llvm::StructType>(pt->getElementType());
|
||||
|
||||
@@ -189,10 +182,9 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
|
||||
void
|
||||
Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
SourcePos firstStmtPos) {
|
||||
llvm::Value *maskPtr = ctx->AllocaInst(LLVMTypes::MaskType, "mask_memory");
|
||||
ctx->StoreInst(LLVMMaskAllOn, maskPtr);
|
||||
maskSymbol->storagePtr = maskPtr;
|
||||
ctx->SetMaskPointer(maskPtr);
|
||||
// Connect the __mask builtin to the location in memory that stores its
|
||||
// value
|
||||
maskSymbol->storagePtr = ctx->GetFullMaskPointer();
|
||||
|
||||
// add debugging info for __mask, programIndex, ...
|
||||
maskSymbol->pos = firstStmtPos;
|
||||
@@ -202,7 +194,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
llvm::BasicBlock *entryBBlock = ctx->GetCurrentBasicBlock();
|
||||
#endif
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
if (type->isTask == true) {
|
||||
// For tasks, we there should always be three parmeters: the
|
||||
// pointer to the structure that holds all of the arguments, the
|
||||
@@ -267,38 +259,87 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
else {
|
||||
// Otherwise use the mask to set the entry mask value
|
||||
argIter->setName("__mask");
|
||||
assert(argIter->getType() == LLVMTypes::MaskType);
|
||||
Assert(argIter->getType() == LLVMTypes::MaskType);
|
||||
ctx->SetFunctionMask(argIter);
|
||||
assert(++argIter == function->arg_end());
|
||||
Assert(++argIter == function->arg_end());
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, we can generate code for the function
|
||||
if (code != NULL) {
|
||||
int costEstimate = code->EstimateCost();
|
||||
ctx->SetDebugPos(code->pos);
|
||||
ctx->AddInstrumentationPoint("function entry");
|
||||
|
||||
int costEstimate = EstimateCost(code);
|
||||
Debug(code->pos, "Estimated cost for function \"%s\" = %d\n",
|
||||
sym->name.c_str(), costEstimate);
|
||||
|
||||
// If the body of the function is non-trivial, then we wrap the
|
||||
// entire thing inside code that tests to see if the mask is all
|
||||
// on, all off, or mixed. If this is a simple function, then this
|
||||
// isn't worth the code bloat / overhead.
|
||||
bool checkMask = (type->isTask == true) ||
|
||||
((function->hasFnAttr(llvm::Attribute::AlwaysInline) == false) &&
|
||||
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);
|
||||
Debug(code->pos, "Estimated cost for function \"%s\" = %d\n",
|
||||
sym->name.c_str(), costEstimate);
|
||||
// If the body of the function is non-trivial, then we wrap the
|
||||
// entire thing around a varying "cif (true)" test in order to reap
|
||||
// the side-effect benefit of checking to see if the execution mask
|
||||
// is all on and thence having a specialized code path for that
|
||||
// case. If this is a simple function, then this isn't worth the
|
||||
// code bloat / overhead.
|
||||
if (checkMask) {
|
||||
bool allTrue[ISPC_MAX_NVEC];
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
allTrue[i] = true;
|
||||
Expr *trueExpr = new ConstExpr(AtomicType::VaryingBool, allTrue,
|
||||
code->pos);
|
||||
code = new IfStmt(trueExpr, code, NULL, true, code->pos);
|
||||
}
|
||||
checkMask &= (g->target.maskingIsFree == false);
|
||||
checkMask &= (g->opt.disableCoherentControlFlow == false);
|
||||
|
||||
ctx->SetDebugPos(code->pos);
|
||||
ctx->AddInstrumentationPoint("function entry");
|
||||
code->EmitCode(ctx);
|
||||
if (checkMask) {
|
||||
llvm::Value *mask = ctx->GetFunctionMask();
|
||||
llvm::Value *allOn = ctx->All(mask);
|
||||
llvm::BasicBlock *bbAllOn = ctx->CreateBasicBlock("all_on");
|
||||
llvm::BasicBlock *bbNotAll = ctx->CreateBasicBlock("not_all_on");
|
||||
|
||||
// Set up basic blocks for goto targets
|
||||
ctx->InitializeLabelMap(code);
|
||||
|
||||
ctx->BranchInst(bbAllOn, bbNotAll, allOn);
|
||||
// all on: we've determined dynamically that the mask is all
|
||||
// on. Set the current mask to "all on" explicitly so that
|
||||
// codegen for this path can be improved with this knowledge in
|
||||
// hand...
|
||||
ctx->SetCurrentBasicBlock(bbAllOn);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
code->EmitCode(ctx);
|
||||
if (ctx->GetCurrentBasicBlock())
|
||||
ctx->ReturnInst();
|
||||
|
||||
// not all on: figure out if no instances are running, or if
|
||||
// some of them are
|
||||
ctx->SetCurrentBasicBlock(bbNotAll);
|
||||
ctx->SetFunctionMask(mask);
|
||||
llvm::BasicBlock *bbNoneOn = ctx->CreateBasicBlock("none_on");
|
||||
llvm::BasicBlock *bbSomeOn = ctx->CreateBasicBlock("some_on");
|
||||
llvm::Value *anyOn = ctx->Any(mask);
|
||||
ctx->BranchInst(bbSomeOn, bbNoneOn, anyOn);
|
||||
|
||||
// Everyone is off; get out of here.
|
||||
ctx->SetCurrentBasicBlock(bbNoneOn);
|
||||
ctx->ReturnInst();
|
||||
|
||||
// some on: reset the mask to the value it had at function
|
||||
// entry and emit the code. Resetting the mask here is
|
||||
// important, due to the "all on" setting of it for the path
|
||||
// above
|
||||
ctx->SetCurrentBasicBlock(bbSomeOn);
|
||||
ctx->SetFunctionMask(mask);
|
||||
|
||||
// Set up basic blocks for goto targets again; we want to have
|
||||
// one set of them for gotos in the 'all on' case, and a
|
||||
// distinct set for the 'mixed mask' case.
|
||||
ctx->InitializeLabelMap(code);
|
||||
|
||||
code->EmitCode(ctx);
|
||||
if (ctx->GetCurrentBasicBlock())
|
||||
ctx->ReturnInst();
|
||||
}
|
||||
else {
|
||||
// Set up basic blocks for goto targets
|
||||
ctx->InitializeLabelMap(code);
|
||||
// No check, just emit the code
|
||||
code->EmitCode(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->GetCurrentBasicBlock()) {
|
||||
@@ -337,7 +378,7 @@ Function::GenerateIR() {
|
||||
return;
|
||||
|
||||
llvm::Function *function = sym->function;
|
||||
assert(function != NULL);
|
||||
Assert(function != NULL);
|
||||
|
||||
// But if that function has a definition, we don't want to redefine it.
|
||||
if (function->empty() == false) {
|
||||
@@ -352,9 +393,8 @@ Function::GenerateIR() {
|
||||
SourcePos firstStmtPos = sym->pos;
|
||||
if (code) {
|
||||
StmtList *sl = dynamic_cast<StmtList *>(code);
|
||||
if (sl && sl->GetStatements().size() > 0 &&
|
||||
sl->GetStatements()[0] != NULL)
|
||||
firstStmtPos = sl->GetStatements()[0]->pos;
|
||||
if (sl && sl->stmts.size() > 0 && sl->stmts[0] != NULL)
|
||||
firstStmtPos = sl->stmts[0]->pos;
|
||||
else
|
||||
firstStmtPos = code->pos;
|
||||
}
|
||||
@@ -376,7 +416,7 @@ Function::GenerateIR() {
|
||||
// it without a mask parameter and without name mangling so that
|
||||
// the application can call it
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(type != NULL);
|
||||
Assert(type != NULL);
|
||||
if (type->isExported) {
|
||||
if (!type->isTask) {
|
||||
LLVM_TYPE_CONST llvm::FunctionType *ftype =
|
||||
|
||||
204
ispc.cpp
204
ispc.cpp
@@ -50,6 +50,7 @@
|
||||
#include <llvm/Analysis/DIBuilder.h>
|
||||
#include <llvm/Analysis/DebugInfo.h>
|
||||
#include <llvm/Support/Dwarf.h>
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/Target/TargetMachine.h>
|
||||
#include <llvm/Target/TargetOptions.h>
|
||||
#include <llvm/Target/TargetData.h>
|
||||
@@ -129,24 +130,60 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 4;
|
||||
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse2-x2")) {
|
||||
t->isa = Target::SSE2;
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4")) {
|
||||
t->isa = Target::SSE4;
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 4;
|
||||
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2")) {
|
||||
t->isa = Target::SSE4;
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-4")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 4;
|
||||
t->maskingIsFree = true;
|
||||
t->allOffMaskIsSafe = true;
|
||||
t->maskBitCount = 1;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-8")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->maskingIsFree = true;
|
||||
t->allOffMaskIsSafe = true;
|
||||
t->maskBitCount = 1;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-16")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 16;
|
||||
t->vectorWidth = 16;
|
||||
t->maskingIsFree = true;
|
||||
t->allOffMaskIsSafe = true;
|
||||
t->maskBitCount = 1;
|
||||
}
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
else if (!strcasecmp(isa, "avx")) {
|
||||
@@ -154,14 +191,40 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+avx,+popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx-x2")) {
|
||||
t->isa = Target::AVX;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx,+popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
#endif // LLVM 3.0
|
||||
#endif // LLVM 3.0+
|
||||
#if defined(LLVM_3_1svn)
|
||||
else if (!strcasecmp(isa, "avx2")) {
|
||||
t->isa = Target::AVX2;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+avx2,+popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2-x2")) {
|
||||
t->isa = Target::AVX2;
|
||||
t->nativeVectorWidth = 16;
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx2,+popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
#endif // LLVM 3.1
|
||||
else {
|
||||
fprintf(stderr, "Target ISA \"%s\" is unknown. Choices are: %s\n",
|
||||
isa, SupportedTargetISAs());
|
||||
@@ -201,10 +264,13 @@ Target::SupportedTargetArchs() {
|
||||
const char *
|
||||
Target::SupportedTargetISAs() {
|
||||
return "sse2, sse2-x2, sse4, sse4-x2"
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#ifndef LLVM_2_9
|
||||
", avx, avx-x2"
|
||||
#endif
|
||||
;
|
||||
#endif // !LLVM_2_9
|
||||
#ifdef LLVM_3_1svn
|
||||
", avx2, avx2-x2"
|
||||
#endif // LLVM_3_1svn
|
||||
", generic-4, generic-8, generic-16";
|
||||
}
|
||||
|
||||
|
||||
@@ -241,11 +307,19 @@ Target::GetTargetMachine() const {
|
||||
|
||||
llvm::Reloc::Model relocModel = generatePIC ? llvm::Reloc::PIC_ :
|
||||
llvm::Reloc::Default;
|
||||
#if defined(LLVM_3_0svn) || defined(LLVM_3_1svn) || defined(LLVM_3_0)
|
||||
#if defined(LLVM_3_1svn)
|
||||
std::string featuresString = attributes;
|
||||
llvm::TargetOptions options;
|
||||
if (g->opt.fastMath == true)
|
||||
options.UnsafeFPMath = 1;
|
||||
llvm::TargetMachine *targetMachine =
|
||||
target->createTargetMachine(triple, cpu, featuresString, options,
|
||||
relocModel);
|
||||
#elif defined(LLVM_3_0)
|
||||
std::string featuresString = attributes;
|
||||
llvm::TargetMachine *targetMachine =
|
||||
target->createTargetMachine(triple, cpu, featuresString, relocModel);
|
||||
#else
|
||||
#else // LLVM 2.9
|
||||
#ifdef ISPC_IS_APPLE
|
||||
relocModel = llvm::Reloc::PIC_;
|
||||
#endif // ISPC_IS_APPLE
|
||||
@@ -255,8 +329,9 @@ Target::GetTargetMachine() const {
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
targetMachine->setRelocationModel(relocModel);
|
||||
#endif // !ISPC_IS_WINDOWS
|
||||
#endif
|
||||
assert(targetMachine != NULL);
|
||||
#endif // LLVM_2_9
|
||||
|
||||
Assert(targetMachine != NULL);
|
||||
|
||||
targetMachine->setAsmVerbosityDefault(true);
|
||||
return targetMachine;
|
||||
@@ -272,7 +347,10 @@ Target::GetISAString() const {
|
||||
return "sse4";
|
||||
case Target::AVX:
|
||||
return "avx";
|
||||
break;
|
||||
case Target::AVX2:
|
||||
return "avx2";
|
||||
case Target::GENERIC:
|
||||
return "generic";
|
||||
default:
|
||||
FATAL("Unhandled target in GetISAString()");
|
||||
}
|
||||
@@ -280,31 +358,113 @@ Target::GetISAString() const {
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) {
|
||||
if (type->isPrimitiveType() || type->isIntegerTy())
|
||||
return false;
|
||||
|
||||
if (type == LLVMTypes::BoolVectorType ||
|
||||
type == LLVMTypes::MaskType ||
|
||||
type == LLVMTypes::Int1VectorType)
|
||||
return true;
|
||||
|
||||
LLVM_TYPE_CONST llvm::ArrayType *at =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(type);
|
||||
if (at != NULL)
|
||||
return lGenericTypeLayoutIndeterminate(at->getElementType());
|
||||
|
||||
LLVM_TYPE_CONST llvm::PointerType *pt =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(type);
|
||||
if (pt != NULL)
|
||||
return false;
|
||||
|
||||
LLVM_TYPE_CONST llvm::StructType *st =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(type);
|
||||
if (st != NULL) {
|
||||
for (int i = 0; i < (int)st->getNumElements(); ++i)
|
||||
if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
Assert(llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(type));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
llvm::Value *
|
||||
Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type) {
|
||||
Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::BasicBlock *insertAtEnd) {
|
||||
if (isa == Target::GENERIC &&
|
||||
lGenericTypeLayoutIndeterminate(type)) {
|
||||
llvm::Value *index[1] = { LLVMInt32(1) };
|
||||
LLVM_TYPE_CONST llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
|
||||
llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep",
|
||||
insertAtEnd);
|
||||
#else
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, &index[0], &index[1],
|
||||
"sizeof_gep", insertAtEnd);
|
||||
#endif
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
|
||||
"sizeof_int", insertAtEnd);
|
||||
else
|
||||
return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
|
||||
"sizeof_int", insertAtEnd);
|
||||
}
|
||||
|
||||
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
|
||||
assert(td != NULL);
|
||||
Assert(td != NULL);
|
||||
uint64_t byteSize = td->getTypeSizeInBits(type) / 8;
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return LLVMInt32(byteSize);
|
||||
return LLVMInt32((int32_t)byteSize);
|
||||
else
|
||||
return LLVMInt64(byteSize);
|
||||
}
|
||||
|
||||
|
||||
llvm::Value *
|
||||
Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element) {
|
||||
Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element,
|
||||
llvm::BasicBlock *insertAtEnd) {
|
||||
if (isa == Target::GENERIC &&
|
||||
lGenericTypeLayoutIndeterminate(type) == true) {
|
||||
llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
|
||||
LLVM_TYPE_CONST llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
|
||||
llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep",
|
||||
insertAtEnd);
|
||||
#else
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, &indices[0], &indices[2],
|
||||
"offset_gep", insertAtEnd);
|
||||
#endif
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
|
||||
"offset_int", insertAtEnd);
|
||||
else
|
||||
return new llvm::PtrToIntInst(gep, LLVMTypes::Int64Type,
|
||||
"offset_int", insertAtEnd);
|
||||
}
|
||||
|
||||
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
|
||||
assert(td != NULL);
|
||||
Assert(td != NULL);
|
||||
LLVM_TYPE_CONST llvm::StructType *structType =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(type);
|
||||
assert(structType != NULL);
|
||||
Assert(structType != NULL);
|
||||
const llvm::StructLayout *sl = td->getStructLayout(structType);
|
||||
assert(sl != NULL);
|
||||
Assert(sl != NULL);
|
||||
|
||||
uint64_t offset = sl->getElementOffset(element);
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return LLVMInt32(offset);
|
||||
return LLVMInt32((int32_t)offset);
|
||||
else
|
||||
return LLVMInt64(offset);
|
||||
}
|
||||
@@ -320,6 +480,7 @@ Opt::Opt() {
|
||||
force32BitAddressing = true;
|
||||
unrollLoops = true;
|
||||
disableAsserts = false;
|
||||
disableMaskAllOnOptimizations = false;
|
||||
disableHandlePseudoMemoryOps = false;
|
||||
disableBlendedMaskedStores = false;
|
||||
disableCoherentControlFlow = false;
|
||||
@@ -328,7 +489,6 @@ Opt::Opt() {
|
||||
disableMaskedStoreToStore = false;
|
||||
disableGatherScatterFlattening = false;
|
||||
disableUniformMemoryOptimizations = false;
|
||||
disableMaskedStoreOptimizations = false;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -362,7 +522,13 @@ Globals::Globals() {
|
||||
// SourcePos
|
||||
|
||||
SourcePos::SourcePos(const char *n, int fl, int fc, int ll, int lc) {
|
||||
name = n ? n : m->module->getModuleIdentifier().c_str();
|
||||
name = n;
|
||||
if (name == NULL) {
|
||||
if (m != NULL)
|
||||
name = m->module->getModuleIdentifier().c_str();
|
||||
else
|
||||
name = "(unknown)";
|
||||
}
|
||||
first_line = fl;
|
||||
first_column = fc;
|
||||
last_line = ll != 0 ? ll : fl;
|
||||
|
||||
60
ispc.h
60
ispc.h
@@ -50,11 +50,22 @@
|
||||
#define ISPC_IS_APPLE
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#define Assert(expr) \
|
||||
((void)((expr) ? 0 : __Assert (#expr, __FILE__, __LINE__)))
|
||||
#define __Assert(expr, file, line) \
|
||||
((void)fprintf(stderr, "%s:%u: Assertion failed: \"%s\"\n" \
|
||||
"***\n*** Please file a bug report at " \
|
||||
"https://github.com/ispc/ispc/issues\n*** (Including as much " \
|
||||
"information as you can about how to reproduce this error).\n" \
|
||||
"*** You have apparently encountered a bug in the compiler that " \
|
||||
"we'd like to fix!\n***\n", file, line, expr), abort(), 0)
|
||||
|
||||
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
|
||||
targets.
|
||||
*/
|
||||
@@ -87,6 +98,8 @@ namespace llvm {
|
||||
#endif
|
||||
|
||||
class ArrayType;
|
||||
class AST;
|
||||
class ASTNode;
|
||||
class AtomicType;
|
||||
class FunctionEmitContext;
|
||||
class Expr;
|
||||
@@ -166,12 +179,14 @@ struct Target {
|
||||
const char *GetISAString() const;
|
||||
|
||||
/** Returns the size of the given type */
|
||||
llvm::Value *SizeOf(LLVM_TYPE_CONST llvm::Type *type);
|
||||
llvm::Value *SizeOf(LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::BasicBlock *insertAtEnd);
|
||||
|
||||
/** Given a structure type and an element number in the structure,
|
||||
returns a value corresponding to the number of bytes from the start
|
||||
of the structure where the element is located. */
|
||||
llvm::Value *StructOffset(LLVM_TYPE_CONST llvm::Type *type,
|
||||
int element);
|
||||
int element, llvm::BasicBlock *insertAtEnd);
|
||||
|
||||
/** llvm Target object representing this target. */
|
||||
const llvm::Target *target;
|
||||
@@ -182,7 +197,7 @@ struct Target {
|
||||
flexible/performant of them will apear last in the enumerant. Note
|
||||
also that __best_available_isa() needs to be updated if ISAs are
|
||||
added or the enumerant values are reordered. */
|
||||
enum ISA { SSE2, SSE4, AVX, NUM_ISAS };
|
||||
enum ISA { SSE2, SSE4, AVX, AVX2, GENERIC, NUM_ISAS };
|
||||
|
||||
/** Instruction set being compiled to. */
|
||||
ISA isa;
|
||||
@@ -211,6 +226,23 @@ struct Target {
|
||||
|
||||
/** Indicates whether position independent code should be generated. */
|
||||
bool generatePIC;
|
||||
|
||||
/** Is there overhead associated with masking on the target
|
||||
architecture; e.g. there is on SSE, due to extra blends and the
|
||||
like, but there isn't with an ISA that supports masking
|
||||
natively. */
|
||||
bool maskingIsFree;
|
||||
|
||||
/** Is it safe to run code with the mask all if: e.g. on SSE, the fast
|
||||
gather trick assumes that at least one program instance is running
|
||||
(so that it can safely assume that the array base pointer is
|
||||
valid). */
|
||||
bool allOffMaskIsSafe;
|
||||
|
||||
/** How many bits are used to store each element of the mask: e.g. this
|
||||
is 32 on SSE/AVX, since that matches the HW better, but it's 1 for
|
||||
the generic target. */
|
||||
int maskBitCount;
|
||||
};
|
||||
|
||||
|
||||
@@ -247,10 +279,15 @@ struct Opt {
|
||||
*/
|
||||
bool force32BitAddressing;
|
||||
|
||||
/** Indicates whether assert() statements should be ignored (for
|
||||
/** Indicates whether Assert() statements should be ignored (for
|
||||
performance in the generated code). */
|
||||
bool disableAsserts;
|
||||
|
||||
|
||||
/** If enabled, disables the various optimizations that kick in when
|
||||
the execution mask can be determined to be "all on" at compile
|
||||
time. */
|
||||
bool disableMaskAllOnOptimizations;
|
||||
|
||||
/** If enabled, the various __pseudo* memory ops (gather/scatter,
|
||||
masked load/store) are left in their __pseudo* form, for better
|
||||
understanding of the structure of generated code when reading
|
||||
@@ -302,14 +339,6 @@ struct Opt {
|
||||
than gathers/scatters. This is likely only useful for measuring
|
||||
the impact of this optimization. */
|
||||
bool disableUniformMemoryOptimizations;
|
||||
|
||||
/** Disables optimizations for masked stores: masked stores with the
|
||||
mask all on are transformed to regular stores, and masked stores
|
||||
with the mask are all off are removed (which in turn can allow
|
||||
eliminating additional dead code related to computing the value
|
||||
stored). This is likely only useful for measuring the impact of
|
||||
this optimization. */
|
||||
bool disableMaskedStoreOptimizations;
|
||||
};
|
||||
|
||||
/** @brief This structure collects together a number of global variables.
|
||||
@@ -394,6 +423,7 @@ enum {
|
||||
COST_FUNPTR_UNIFORM = 12,
|
||||
COST_FUNPTR_VARYING = 24,
|
||||
COST_GATHER = 8,
|
||||
COST_GOTO = 4,
|
||||
COST_LOAD = 2,
|
||||
COST_REGULAR_BREAK_CONTINUE = 2,
|
||||
COST_RETURN = 4,
|
||||
@@ -407,6 +437,8 @@ enum {
|
||||
COST_VARYING_IF = 3,
|
||||
COST_UNIFORM_LOOP = 4,
|
||||
COST_VARYING_LOOP = 6,
|
||||
COST_UNIFORM_SWITCH = 4,
|
||||
COST_VARYING_SWITCH = 12,
|
||||
COST_ASSERT = 8,
|
||||
|
||||
CHECK_MASK_AT_FUNCTION_START_COST = 16,
|
||||
|
||||
5
ispc.sln
5
ispc.sln
@@ -3,8 +3,6 @@ Microsoft Visual Studio Solution File, Format Version 11.00
|
||||
# Visual Studio 2010
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ispc", "ispc.vcxproj", "{9861F490-F516-480C-B63C-D62A77AFA9D5}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ispc_test", "ispc_test.vcxproj", "{92547BA8-BE86-4E78-8799-1D72A70E5831}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
@@ -15,9 +13,6 @@ Global
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Release|Win32.Build.0 = Release|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Release|Win32.ActiveCfg = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
214
ispc.vcxproj
214
ispc.vcxproj
@@ -13,20 +13,27 @@
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ast.cpp" />
|
||||
<ClCompile Include="builtins.cpp" />
|
||||
<ClCompile Include="cbackend.cpp" />
|
||||
<ClCompile Include="ctx.cpp" />
|
||||
<ClCompile Include="decl.cpp" />
|
||||
<ClCompile Include="expr.cpp" />
|
||||
<ClCompile Include="func.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx1.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx1-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx2-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-c-32.cpp" />
|
||||
<ClCompile Include="gen-bitcode-c-64.cpp" />
|
||||
<ClCompile Include="gen-bitcode-dispatch.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-4.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-8.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-16.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse2-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse4.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse4-x2.cpp" />
|
||||
<ClCompile Include="gen-stdlib.cpp" />
|
||||
<ClCompile Include="gen-stdlib-generic.cpp" />
|
||||
<ClCompile Include="gen-stdlib-x86.cpp" />
|
||||
<ClCompile Include="ispc.cpp" />
|
||||
<ClCompile Include="lex.cc">
|
||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4146;4800;4996;4355;4624;4005;4003;4018</DisableSpecificWarnings>
|
||||
@@ -40,15 +47,15 @@
|
||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4146;4800;4996;4355;4624;4005;4065</DisableSpecificWarnings>
|
||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">4146;4800;4996;4355;4624;4005;4065</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<CustomBuild Include="builtins-c.c">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c > gen-bitcode-c-32.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-64.c > gen-bitcode-c-64.cpp</Command>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">clang builtins-c.c</Message>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c > gen-bitcode-c-32.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-64.c > gen-bitcode-c-64.cpp</Command>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">clang builtins-c.c</Message>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-c-32.cpp;gen-bitcore-c-64.cpp</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-c-32.cpp;gen-bitcore-c-64.cpp</Outputs>
|
||||
<CustomBuild Include="builtins\builtins.c">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-32 > gen-bitcode-c-32.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-64 > gen-bitcode-c-64.cpp</Command>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building builtins.c</Message>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-32 > gen-bitcode-c-32.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-64 > gen-bitcode-c-64.cpp</Command>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building builtins.c</Message>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-c-32.cpp;gen-bitcode-c-64.cpp</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-c-32.cpp;gen-bitcode-c-64.cpp</Outputs>
|
||||
</CustomBuild>
|
||||
<ClCompile Include="stmt.cpp" />
|
||||
<ClCompile Include="sym.cpp" />
|
||||
@@ -75,103 +82,172 @@
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="stdlib.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib.cpp</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib.cpp</Outputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib.cpp</Message>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py x86 > gen-stdlib-x86.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DISPC_TARGET_GENERIC=1 -DPI=3.1415926535 | python stdlib2cpp.py generic > gen-stdlib-generic.cpp;
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib-generic.cpp;gen-stdlib-x86.cpp</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py x86 > gen-stdlib-x86.cpp;
|
||||
%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DISPC_TARGET_GENERIC=1 -DPI=3.1415926535 | python stdlib2cpp.py generic > gen-stdlib-generic.cpp;
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib-generic.cpp;gen-stdlib-x86.cpp</Outputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib-{generic,x86}.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib-{generic,x86}.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-sse4.ll">
|
||||
<CustomBuild Include="builtins\dispatch.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse4.ll | python bitcode2cpp.py builtins-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse4.ll | python bitcode2cpp.py builtins-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-dispatch.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-dispatch.ll | python bitcode2cpp.py builtins-dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\dispatch.ll | python bitcode2cpp.py dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-dispatch.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-dispatch.ll | python bitcode2cpp.py builtins-dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\dispatch.ll | python bitcode2cpp.py dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-dispatch.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4</AdditionalInputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-dispatch.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-dispatch.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-sse4-x2.ll">
|
||||
<CustomBuild Include="builtins\target-sse4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse4-x2.ll | python bitcode2cpp.py builtins-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-sse4-x2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse4-x2.ll | python bitcode2cpp.py builtins-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4-x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4-x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-sse2.ll">
|
||||
<CustomBuild Include="builtins\target-sse2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse2.ll | python bitcode2cpp.py builtins-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse2.ll | python bitcode2cpp.py builtins-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-sse2-x2.ll">
|
||||
<CustomBuild Include="builtins\target-sse2-x2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse2-x2.ll | python bitcode2cpp.py builtins-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse2-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse2-x2.ll | python bitcode2cpp.py builtins-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse2-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse2-x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse2-x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-avx.ll">
|
||||
<CustomBuild Include="builtins\target-avx1.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-avx.ll | python bitcode2cpp.py builtins-avx.ll > gen-bitcode-avx.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-avx-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-avx.ll | python bitcode2cpp.py builtins-avx.ll > gen-bitcode-avx.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-avx-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx.cpp</Message>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx1.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx1.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx1.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx1.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins-avx-x2.ll">
|
||||
<CustomBuild Include="builtins\target-avx1-x2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-avx-x2.ll | python bitcode2cpp.py builtins-avx-x2.ll > gen-bitcode-avx-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-avx-x2.ll | python bitcode2cpp.py builtins-avx-x2.ll > gen-bitcode-avx-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx-x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx-x2.cpp</Message>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx1-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx1-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx1-x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx1-x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-avx2-x2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx2-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx2-x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx2-x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx2-x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-4.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-4.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-8.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-8.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-8.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-8.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-8.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-16.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-16.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-16.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-16.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-16.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
||||
379
ispc_test.cpp
379
ispc_test.cpp
@@ -1,379 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define ISPC_IS_WINDOWS
|
||||
#elif defined(__linux__)
|
||||
#define ISPC_IS_LINUX
|
||||
#elif defined(__APPLE__)
|
||||
#define ISPC_IS_APPLE
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <memory.h>
|
||||
#ifdef ISPC_IS_LINUX
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_HAVE_SVML
|
||||
#include <xmmintrin.h>
|
||||
extern "C" {
|
||||
extern __m128 __svml_sinf4(__m128);
|
||||
extern __m128 __svml_cosf4(__m128);
|
||||
extern __m128 __svml_sincosf4(__m128 *,__m128);
|
||||
extern __m128 __svml_tanf4(__m128);
|
||||
extern __m128 __svml_atanf4(__m128);
|
||||
extern __m128 __svml_atan2f4(__m128, __m128);
|
||||
extern __m128 __svml_expf4(__m128);
|
||||
extern __m128 __svml_logf4(__m128);
|
||||
extern __m128 __svml_powf4(__m128, __m128);
|
||||
}
|
||||
#endif
|
||||
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Module.h>
|
||||
#include <llvm/Type.h>
|
||||
#include <llvm/DerivedTypes.h>
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/ExecutionEngine/ExecutionEngine.h>
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#else
|
||||
#include <llvm/Target/TargetRegistry.h>
|
||||
#include <llvm/Target/TargetSelect.h>
|
||||
#endif
|
||||
#include <llvm/ExecutionEngine/JIT.h>
|
||||
#include <llvm/Target/TargetOptions.h>
|
||||
#include <llvm/Target/TargetData.h>
|
||||
#include <llvm/Transforms/Scalar.h>
|
||||
#include <llvm/Transforms/IPO.h>
|
||||
#include <llvm/PassManager.h>
|
||||
#include <llvm/Support/CFG.h>
|
||||
#include <llvm/Analysis/Verifier.h>
|
||||
#include <llvm/Assembly/PrintModulePass.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <llvm/Bitcode/ReaderWriter.h>
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
#include <llvm/Support/system_error.h>
|
||||
|
||||
bool shouldFail = false;
|
||||
|
||||
extern "C" {
|
||||
void ISPCLaunch(void **, void *, void *, int32_t);
|
||||
void ISPCSync(void *);
|
||||
void *ISPCAlloc(void **, int64_t size, int32_t alignment);
|
||||
}
|
||||
|
||||
void ISPCLaunch(void **handle, void *func, void *data, int32_t count) {
|
||||
*handle = (void *)0xdeadbeef;
|
||||
typedef void (*TaskFuncType)(void *, int, int, int, int);
|
||||
TaskFuncType tft = (TaskFuncType)(func);
|
||||
for (int i = 0; i < count; ++i)
|
||||
tft(data, 0, 1, i, count);
|
||||
}
|
||||
|
||||
|
||||
void ISPCSync(void *) {
|
||||
}
|
||||
|
||||
|
||||
void *ISPCAlloc(void **handle, int64_t size, int32_t alignment) {
|
||||
*handle = (void *)0xdeadbeef;
|
||||
// leak time!
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
return _aligned_malloc((size_t)size, alignment);
|
||||
#endif
|
||||
#ifdef ISPC_IS_LINUX
|
||||
return memalign(alignment, size);
|
||||
#endif
|
||||
#ifdef ISPC_IS_APPLE
|
||||
void *mem = malloc(size + (alignment-1) + sizeof(void*));
|
||||
char *amem = ((char*)mem) + sizeof(void*);
|
||||
amem = amem + uint32_t(alignment - (reinterpret_cast<uint64_t>(amem) &
|
||||
(alignment - 1)));
|
||||
((void**)amem)[-1] = mem;
|
||||
return amem;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void usage(int ret) {
|
||||
fprintf(stderr, "usage: ispc_test\n");
|
||||
fprintf(stderr, "\t[-h/--help]\tprint help\n");
|
||||
fprintf(stderr, "\t[-f]\t\tindicates that test is expected to fail\n");
|
||||
fprintf(stderr, "\t<files>\n");
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
static void svml_missing() {
|
||||
fprintf(stderr, "Program called unavailable SVML function!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// On Windows, sin() is an overloaded function, so we need an unambiguous
|
||||
// function we can take the address of when wiring up the external references
|
||||
// below.
|
||||
|
||||
double Sin(double x) { return sin(x); }
|
||||
double Cos(double x) { return cos(x); }
|
||||
double Tan(double x) { return tan(x); }
|
||||
double Atan(double x) { return atan(x); }
|
||||
double Atan2(double y, double x) { return atan2(y, x); }
|
||||
double Pow(double a, double b) { return pow(a, b); }
|
||||
double Exp(double x) { return exp(x); }
|
||||
double Log(double x) { return log(x); }
|
||||
|
||||
static bool lRunTest(const char *fn) {
|
||||
llvm::LLVMContext *ctx = new llvm::LLVMContext;
|
||||
|
||||
llvm::OwningPtr<llvm::MemoryBuffer> buf;
|
||||
llvm::error_code err = llvm::MemoryBuffer::getFileOrSTDIN(fn, buf);
|
||||
if (err) {
|
||||
fprintf(stderr, "Unable to open file \"%s\": %s\n", fn, err.message().c_str());
|
||||
delete ctx;
|
||||
return false;
|
||||
}
|
||||
std::string bcErr;
|
||||
llvm::Module *module = llvm::ParseBitcodeFile(buf.get(), *ctx, &bcErr);
|
||||
|
||||
if (!module) {
|
||||
fprintf(stderr, "Bitcode reader failed for \"%s\": %s\n", fn, bcErr.c_str());
|
||||
delete ctx;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string eeError;
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::EngineBuilder engineBuilder(module);
|
||||
engineBuilder.setErrorStr(&eeError);
|
||||
engineBuilder.setEngineKind(llvm::EngineKind::JIT);
|
||||
#if 0
|
||||
std::vector<std::string> attributes;
|
||||
if (target != NULL && !strcmp(target, "avx"))
|
||||
attributes.push_back("+avx");
|
||||
engineBuilder.setMAttrs(attributes);
|
||||
engineBuilder.setUseMCJIT(true);
|
||||
#endif
|
||||
llvm::ExecutionEngine *ee = engineBuilder.create();
|
||||
#else
|
||||
llvm::ExecutionEngine *ee = llvm::ExecutionEngine::createJIT(module, &eeError);
|
||||
#endif
|
||||
if (!ee) {
|
||||
fprintf(stderr, "Unable to create ExecutionEngine: %s\n", eeError.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
llvm::Function *func;
|
||||
#define DO_FUNC(FUNC ,FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)FUNC)
|
||||
DO_FUNC(ISPCLaunch, "ISPCLaunch");
|
||||
DO_FUNC(ISPCSync, "ISPCSync");
|
||||
DO_FUNC(ISPCAlloc, "ISPCAlloc");
|
||||
DO_FUNC(putchar, "putchar");
|
||||
DO_FUNC(printf, "printf");
|
||||
DO_FUNC(fflush, "fflush");
|
||||
DO_FUNC(sinf, "sinf");
|
||||
DO_FUNC(cosf, "cosf");
|
||||
DO_FUNC(tanf, "tanf");
|
||||
DO_FUNC(atanf, "atanf");
|
||||
DO_FUNC(atan2f, "atan2f");
|
||||
DO_FUNC(powf, "powf");
|
||||
DO_FUNC(expf, "expf");
|
||||
DO_FUNC(logf, "logf");
|
||||
DO_FUNC(Sin, "sin");
|
||||
DO_FUNC(Cos, "cos");
|
||||
DO_FUNC(Tan, "tan");
|
||||
DO_FUNC(Atan, "atan");
|
||||
DO_FUNC(Atan2, "atan2");
|
||||
DO_FUNC(Pow, "pow");
|
||||
DO_FUNC(Exp, "exp");
|
||||
DO_FUNC(Log, "log");
|
||||
DO_FUNC(memset, "memset");
|
||||
#ifdef ISPC_IS_APPLE
|
||||
DO_FUNC(memset_pattern4, "memset_pattern4");
|
||||
DO_FUNC(memset_pattern8, "memset_pattern8");
|
||||
DO_FUNC(memset_pattern16, "memset_pattern16");
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_HAVE_SVML
|
||||
#define DO_SVML(FUNC ,FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)FUNC)
|
||||
#else
|
||||
#define DO_SVML(FUNC, FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)svml_missing)
|
||||
#endif
|
||||
|
||||
DO_SVML(__svml_sinf4, "__svml_sinf4");
|
||||
DO_SVML(__svml_cosf4, "__svml_cosf4");
|
||||
DO_SVML(__svml_sincosf4, "__svml_sincosf4");
|
||||
DO_SVML(__svml_tanf4, "__svml_tanf4");
|
||||
DO_SVML(__svml_atanf4, "__svml_atanf4");
|
||||
DO_SVML(__svml_atan2f4, "__svml_atan2f4");
|
||||
DO_SVML(__svml_expf4, "__svml_expf4");
|
||||
DO_SVML(__svml_logf4, "__svml_logf4");
|
||||
DO_SVML(__svml_powf4, "__svml_powf4");
|
||||
|
||||
// figure out the vector width in the compiled code
|
||||
func = module->getFunction("width");
|
||||
if (!func) {
|
||||
fprintf(stderr, "No width() function found!\n");
|
||||
return false;
|
||||
}
|
||||
int width;
|
||||
{
|
||||
typedef int (*PFN)();
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
width = pfn();
|
||||
assert(width == 4 || width == 8 || width == 12 || width == 16);
|
||||
}
|
||||
|
||||
// find the value that returns the desired result
|
||||
func = module->getFunction("result");
|
||||
bool foundResult = (func != NULL);
|
||||
float result[16];
|
||||
for (int i = 0; i < 16; ++i)
|
||||
result[i] = 0;
|
||||
if (foundResult) {
|
||||
typedef void (*PFN)(float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(result);
|
||||
}
|
||||
else
|
||||
fprintf(stderr, "Warning: no result() function found.\n");
|
||||
|
||||
// try to find a function to run
|
||||
float returned[16];
|
||||
for (int i = 0; i < 16; ++i)
|
||||
returned[i] = 0;
|
||||
float vfloat[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
|
||||
double vdouble[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
|
||||
int vint[16] = { 2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32 };
|
||||
int vint2[16] = { 5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
|
||||
|
||||
if ((func = module->getFunction("f_v")) != NULL) {
|
||||
typedef void (*PFN)(float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned);
|
||||
}
|
||||
else if ((func = module->getFunction("f_f")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
llvm::verifyFunction(*func);
|
||||
pfn(returned, vfloat);
|
||||
}
|
||||
else if ((func = module->getFunction("f_fu")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *, float fu);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
llvm::verifyFunction(*func);
|
||||
pfn(returned, vfloat, 5.);
|
||||
}
|
||||
else if ((func = module->getFunction("f_fi")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *, int *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vfloat, vint);
|
||||
}
|
||||
else if ((func = module->getFunction("f_du")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, double);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, 5.);
|
||||
}
|
||||
else if ((func = module->getFunction("f_duf")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, float);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, 5.f);
|
||||
}
|
||||
else if ((func = module->getFunction("f_di")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, int *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, vint2);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Unable to find runnable function in file \"%s\"\n", fn);
|
||||
return false;
|
||||
}
|
||||
|
||||
// see if we got the right result
|
||||
bool resultsMatch = true;
|
||||
if (foundResult) {
|
||||
for (int i = 0; i < width; ++i)
|
||||
if (returned[i] != result[i]) {
|
||||
resultsMatch = false;
|
||||
fprintf(stderr, "Test \"%s\" RETURNED %d: %g / %a EXPECTED %g / %a\n",
|
||||
fn, i, returned[i], returned[i], result[i], result[i]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < width; ++i)
|
||||
fprintf(stderr, "Test \"%s\" returned %d: %g / %a\n",
|
||||
fn, i, returned[i], returned[i]);
|
||||
}
|
||||
if (foundResult && shouldFail && resultsMatch)
|
||||
fprintf(stderr, "Test %s unexpectedly passed\n", fn);
|
||||
|
||||
delete ee;
|
||||
delete ctx;
|
||||
|
||||
return foundResult && resultsMatch;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
llvm::InitializeNativeTarget();
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
LLVMLinkInJIT();
|
||||
#endif
|
||||
|
||||
const char *filename = NULL;
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
|
||||
usage(0);
|
||||
if (!strcmp(argv[i], "-f"))
|
||||
shouldFail = true;
|
||||
else
|
||||
filename = argv[i];
|
||||
}
|
||||
|
||||
return (lRunTest(filename) == true) ? 0 : 1;
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ispc_test.cpp" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{92547BA8-BE86-4E78-8799-1D72A70E5831}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>ispc_test</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>LLVM_3_0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4146;4355;4800</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Desc.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>LLVM_3_0;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4146;4355;4800</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Desc.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
109
lex.ll
109
lex.ll
@@ -42,7 +42,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static uint64_t lParseBinary(const char *ptr, SourcePos pos);
|
||||
static uint64_t lParseBinary(const char *ptr, SourcePos pos, char **endPtr);
|
||||
static void lCComment(SourcePos *);
|
||||
static void lCppComment(SourcePos *);
|
||||
static void lHandleCppHash(SourcePos *);
|
||||
@@ -67,7 +67,7 @@ inline int isatty(int) { return 0; }
|
||||
%option nounistd
|
||||
|
||||
WHITESPACE [ \t\r]+
|
||||
INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))
|
||||
INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[kMG]?
|
||||
FLOAT_NUMBER (([0-9]+|(([0-9]+\.[0-9]*[fF]?)|(\.[0-9]+)))([eE][-+]?[0-9]+)?[fF]?)
|
||||
HEX_FLOAT_NUMBER (0x[01](\.[0-9a-fA-F]*)?p[-+]?[0-9]+[fF]?)
|
||||
|
||||
@@ -148,65 +148,62 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
|
||||
return TOKEN_IDENTIFIER;
|
||||
}
|
||||
|
||||
{INT_NUMBER} {
|
||||
char *endPtr = NULL;
|
||||
int64_t val;
|
||||
{INT_NUMBER}+(u|U|l|L)*? {
|
||||
int ls = 0, us = 0;
|
||||
|
||||
char *endPtr = NULL;
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
val = lParseBinary(yytext+2, *yylloc);
|
||||
yylval->intVal = lParseBinary(yytext+2, *yylloc, &endPtr);
|
||||
else {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
val = _strtoi64(yytext, &endPtr, 0);
|
||||
#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__)
|
||||
yylval->intVal = _strtoi64(yytext, &endPtr, 0);
|
||||
#else
|
||||
// FIXME: should use strtouq and then issue an error if we can't
|
||||
// fit into 64 bits...
|
||||
val = strtoull(yytext, &endPtr, 0);
|
||||
yylval->intVal = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool kilo = false, mega = false, giga = false;
|
||||
for (; *endPtr; endPtr++) {
|
||||
if (*endPtr == 'k')
|
||||
kilo = true;
|
||||
else if (*endPtr == 'M')
|
||||
mega = true;
|
||||
else if (*endPtr == 'G')
|
||||
giga = true;
|
||||
else if (*endPtr == 'l' || *endPtr == 'L')
|
||||
ls++;
|
||||
else if (*endPtr == 'u' || *endPtr == 'U')
|
||||
us++;
|
||||
}
|
||||
if (kilo)
|
||||
yylval->intVal *= 1024;
|
||||
if (mega)
|
||||
yylval->intVal *= 1024*1024;
|
||||
if (giga)
|
||||
yylval->intVal *= 1024*1024*1024;
|
||||
|
||||
if (ls >= 2)
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
else if (ls == 1)
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
|
||||
// See if we can fit this into a 32-bit integer...
|
||||
if ((val & 0xffffffff) == val) {
|
||||
yylval->int32Val = (int32_t)val;
|
||||
return TOKEN_INT32_CONSTANT;
|
||||
}
|
||||
else {
|
||||
yylval->int64Val = val;
|
||||
return TOKEN_INT64_CONSTANT;
|
||||
}
|
||||
if ((yylval->intVal & 0xffffffff) == yylval->intVal)
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
else
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
}
|
||||
|
||||
{INT_NUMBER}[uU] {
|
||||
char *endPtr = NULL;
|
||||
uint64_t val;
|
||||
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
val = lParseBinary(yytext+2, *yylloc);
|
||||
else {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
val = _strtoui64(yytext, &endPtr, 0);
|
||||
#else
|
||||
val = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
if ((val & 0xffffffff) == val) {
|
||||
// we can represent it in a 32-bit value
|
||||
yylval->int32Val = (int32_t)val;
|
||||
return TOKEN_UINT32_CONSTANT;
|
||||
}
|
||||
else {
|
||||
yylval->int64Val = val;
|
||||
return TOKEN_UINT64_CONSTANT;
|
||||
}
|
||||
}
|
||||
|
||||
{FLOAT_NUMBER} {
|
||||
yylval->floatVal = atof(yytext);
|
||||
yylval->floatVal = (float)atof(yytext);
|
||||
return TOKEN_FLOAT_CONSTANT;
|
||||
}
|
||||
|
||||
{HEX_FLOAT_NUMBER} {
|
||||
yylval->floatVal = lParseHexFloat(yytext);
|
||||
yylval->floatVal = (float)lParseHexFloat(yytext);
|
||||
return TOKEN_FLOAT_CONSTANT;
|
||||
}
|
||||
|
||||
@@ -285,14 +282,11 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
|
||||
/** Return the integer version of a binary constant from a string.
|
||||
*/
|
||||
static uint64_t
|
||||
lParseBinary(const char *ptr, SourcePos pos) {
|
||||
lParseBinary(const char *ptr, SourcePos pos, char **endPtr) {
|
||||
uint64_t val = 0;
|
||||
bool warned = false;
|
||||
|
||||
while (*ptr != '\0') {
|
||||
/* if this hits, the regexp for 0b... constants is broken */
|
||||
assert(*ptr == '0' || *ptr == '1');
|
||||
|
||||
while (*ptr == '0' || *ptr == '1') {
|
||||
if ((val & (((int64_t)1)<<63)) && warned == false) {
|
||||
// We're about to shift out a set bit
|
||||
Warning(pos, "Can't represent binary constant with a 64-bit integer type");
|
||||
@@ -302,6 +296,7 @@ lParseBinary(const char *ptr, SourcePos pos) {
|
||||
val = (val << 1) | (*ptr == '0' ? 0 : 1);
|
||||
++ptr;
|
||||
}
|
||||
*endPtr = (char *)ptr;
|
||||
return val;
|
||||
}
|
||||
|
||||
@@ -346,7 +341,7 @@ static void lHandleCppHash(SourcePos *pos) {
|
||||
char *ptr, *src;
|
||||
|
||||
// Advance past the opening stuff on the line.
|
||||
assert(yytext[0] == '#');
|
||||
Assert(yytext[0] == '#');
|
||||
if (yytext[1] == ' ')
|
||||
// On Linux/OSX, the preprocessor gives us lines like
|
||||
// # 1234 "foo.c"
|
||||
@@ -354,7 +349,7 @@ static void lHandleCppHash(SourcePos *pos) {
|
||||
else {
|
||||
// On windows, cl.exe's preprocessor gives us lines of the form:
|
||||
// #line 1234 "foo.c"
|
||||
assert(!strncmp(yytext+1, "line ", 5));
|
||||
Assert(!strncmp(yytext+1, "line ", 5));
|
||||
ptr = yytext + 6;
|
||||
}
|
||||
|
||||
@@ -364,13 +359,13 @@ static void lHandleCppHash(SourcePos *pos) {
|
||||
pos->last_column = 1;
|
||||
// Make sure that the character after the integer is a space and that
|
||||
// then we have open quotes
|
||||
assert(src != ptr && src[0] == ' ' && src[1] == '"');
|
||||
Assert(src != ptr && src[0] == ' ' && src[1] == '"');
|
||||
src += 2;
|
||||
|
||||
// And the filename is everything up until the closing quotes
|
||||
std::string filename;
|
||||
while (*src != '"') {
|
||||
assert(*src && *src != '\n');
|
||||
Assert(*src && *src != '\n');
|
||||
filename.push_back(*src);
|
||||
++src;
|
||||
}
|
||||
@@ -471,13 +466,13 @@ ipow2(int exponent) {
|
||||
*/
|
||||
static double
|
||||
lParseHexFloat(const char *ptr) {
|
||||
assert(ptr != NULL);
|
||||
Assert(ptr != NULL);
|
||||
|
||||
assert(ptr[0] == '0' && ptr[1] == 'x');
|
||||
Assert(ptr[0] == '0' && ptr[1] == 'x');
|
||||
ptr += 2;
|
||||
|
||||
// Start initializing the mantissa
|
||||
assert(*ptr == '0' || *ptr == '1');
|
||||
Assert(*ptr == '0' || *ptr == '1');
|
||||
double mantissa = (*ptr == '1') ? 1. : 0.;
|
||||
++ptr;
|
||||
|
||||
@@ -497,7 +492,7 @@ lParseHexFloat(const char *ptr) {
|
||||
else if (*ptr >= 'a' && *ptr <= 'f')
|
||||
digit = 10 + *ptr - 'a';
|
||||
else {
|
||||
assert(*ptr >= 'A' && *ptr <= 'F');
|
||||
Assert(*ptr >= 'A' && *ptr <= 'F');
|
||||
digit = 10 + *ptr - 'A';
|
||||
}
|
||||
|
||||
@@ -510,7 +505,7 @@ lParseHexFloat(const char *ptr) {
|
||||
else
|
||||
// If there's not a '.', then we better be going straight to the
|
||||
// exponent
|
||||
assert(*ptr == 'p');
|
||||
Assert(*ptr == 'p');
|
||||
|
||||
++ptr; // skip the 'p'
|
||||
|
||||
|
||||
269
llvmutil.cpp
269
llvmutil.cpp
@@ -36,7 +36,9 @@
|
||||
*/
|
||||
|
||||
#include "llvmutil.h"
|
||||
#include "ispc.h"
|
||||
#include "type.h"
|
||||
#include <llvm/Instructions.h>
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL;
|
||||
LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
|
||||
@@ -105,11 +107,14 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
||||
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
|
||||
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
|
||||
|
||||
// Note that both the mask and bool vectors are vector of int32s
|
||||
// (not i1s). LLVM ends up generating much better SSE code with
|
||||
// this representation.
|
||||
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.vectorWidth);
|
||||
if (target.maskBitCount == 1)
|
||||
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
|
||||
else {
|
||||
Assert(target.maskBitCount == 32);
|
||||
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.vectorWidth);
|
||||
}
|
||||
|
||||
LLVMTypes::Int1VectorType =
|
||||
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
|
||||
@@ -141,7 +146,11 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
||||
|
||||
std::vector<llvm::Constant *> maskOnes;
|
||||
llvm::Constant *onMask = NULL;
|
||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
|
||||
if (target.maskBitCount == 1)
|
||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 1,
|
||||
false /*unsigned*/); // 0x1
|
||||
else
|
||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
|
||||
true /*signed*/); // 0xffffffff
|
||||
|
||||
for (int i = 0; i < target.vectorWidth; ++i)
|
||||
@@ -150,8 +159,12 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
||||
|
||||
std::vector<llvm::Constant *> maskZeros;
|
||||
llvm::Constant *offMask = NULL;
|
||||
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
|
||||
true /*signed*/);
|
||||
if (target.maskBitCount == 1)
|
||||
offMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 0,
|
||||
true /*signed*/);
|
||||
else
|
||||
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
|
||||
true /*signed*/);
|
||||
|
||||
for (int i = 0; i < target.vectorWidth; ++i)
|
||||
maskZeros.push_back(offMask);
|
||||
@@ -424,7 +437,7 @@ LLVMBoolVector(bool b) {
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, b ? 0xffffffff : 0,
|
||||
false /*unsigned*/);
|
||||
else {
|
||||
assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
Assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
llvm::Type::getInt1Ty(*g->ctx));
|
||||
v = b ? LLVMTrue : LLVMFalse;
|
||||
}
|
||||
@@ -445,7 +458,7 @@ LLVMBoolVector(const bool *bvec) {
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, bvec[i] ? 0xffffffff : 0,
|
||||
false /*unsigned*/);
|
||||
else {
|
||||
assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
Assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
llvm::Type::getInt1Ty(*g->ctx));
|
||||
v = bvec[i] ? LLVMTrue : LLVMFalse;
|
||||
}
|
||||
@@ -454,3 +467,239 @@ LLVMBoolVector(const bool *bvec) {
|
||||
}
|
||||
return llvm::ConstantVector::get(vals);
|
||||
}
|
||||
|
||||
|
||||
/** Conservative test to see if two llvm::Values are equal. There are
|
||||
(potentially many) cases where the two values actually are equal but
|
||||
this will return false. However, if it does return true, the two
|
||||
vectors definitely are equal.
|
||||
|
||||
@todo This seems to catch all of the cases we currently need it for in
|
||||
practice, but it's be nice to make it a little more robust/general. In
|
||||
general, though, a little something called the halting problem means we
|
||||
won't get all of them.
|
||||
*/
|
||||
static bool
|
||||
lValuesAreEqual(llvm::Value *v0, llvm::Value *v1,
|
||||
std::vector<llvm::PHINode *> &seenPhi0,
|
||||
std::vector<llvm::PHINode *> &seenPhi1) {
|
||||
// Thanks to the fact that LLVM hashes and returns the same pointer for
|
||||
// constants (of all sorts, even constant expressions), this first test
|
||||
// actually catches a lot of cases. LLVM's SSA form also helps a lot
|
||||
// with this..
|
||||
if (v0 == v1)
|
||||
return true;
|
||||
|
||||
Assert(seenPhi0.size() == seenPhi1.size());
|
||||
for (unsigned int i = 0; i < seenPhi0.size(); ++i)
|
||||
if (v0 == seenPhi0[i] && v1 == seenPhi1[i])
|
||||
return true;
|
||||
|
||||
llvm::BinaryOperator *bo0 = llvm::dyn_cast<llvm::BinaryOperator>(v0);
|
||||
llvm::BinaryOperator *bo1 = llvm::dyn_cast<llvm::BinaryOperator>(v1);
|
||||
if (bo0 != NULL && bo1 != NULL) {
|
||||
if (bo0->getOpcode() != bo1->getOpcode())
|
||||
return false;
|
||||
return (lValuesAreEqual(bo0->getOperand(0), bo1->getOperand(0),
|
||||
seenPhi0, seenPhi1) &&
|
||||
lValuesAreEqual(bo0->getOperand(1), bo1->getOperand(1),
|
||||
seenPhi0, seenPhi1));
|
||||
}
|
||||
|
||||
llvm::PHINode *phi0 = llvm::dyn_cast<llvm::PHINode>(v0);
|
||||
llvm::PHINode *phi1 = llvm::dyn_cast<llvm::PHINode>(v1);
|
||||
if (phi0 != NULL && phi1 != NULL) {
|
||||
if (phi0->getNumIncomingValues() != phi1->getNumIncomingValues())
|
||||
return false;
|
||||
|
||||
seenPhi0.push_back(phi0);
|
||||
seenPhi1.push_back(phi1);
|
||||
|
||||
unsigned int numIncoming = phi0->getNumIncomingValues();
|
||||
// Check all of the incoming values: if all of them are all equal,
|
||||
// then we're good.
|
||||
bool anyFailure = false;
|
||||
for (unsigned int i = 0; i < numIncoming; ++i) {
|
||||
Assert(phi0->getIncomingBlock(i) == phi1->getIncomingBlock(i));
|
||||
if (!lValuesAreEqual(phi0->getIncomingValue(i),
|
||||
phi1->getIncomingValue(i), seenPhi0, seenPhi1)) {
|
||||
anyFailure = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
seenPhi0.pop_back();
|
||||
seenPhi1.pop_back();
|
||||
|
||||
return !anyFailure;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/** Given an llvm::Value known to be an integer, return its value as
|
||||
an int64_t.
|
||||
*/
|
||||
static int64_t
|
||||
lGetIntValue(llvm::Value *offset) {
|
||||
llvm::ConstantInt *intOffset = llvm::dyn_cast<llvm::ConstantInt>(offset);
|
||||
Assert(intOffset && (intOffset->getBitWidth() == 32 ||
|
||||
intOffset->getBitWidth() == 64));
|
||||
return intOffset->getSExtValue();
|
||||
}
|
||||
|
||||
|
||||
/** This function takes chains of InsertElement instructions along the
|
||||
lines of:
|
||||
|
||||
%v0 = insertelement undef, value_0, i32 index_0
|
||||
%v1 = insertelement %v1, value_1, i32 index_1
|
||||
...
|
||||
%vn = insertelement %vn-1, value_n-1, i32 index_n-1
|
||||
|
||||
and initializes the provided elements array such that the i'th
|
||||
llvm::Value * in the array is the element that was inserted into the
|
||||
i'th element of the vector.
|
||||
*/
|
||||
void
|
||||
LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
|
||||
llvm::Value **elements) {
|
||||
for (int i = 0; i < vectorWidth; ++i)
|
||||
elements[i] = NULL;
|
||||
|
||||
while (ie != NULL) {
|
||||
int64_t iOffset = lGetIntValue(ie->getOperand(2));
|
||||
Assert(iOffset >= 0 && iOffset < vectorWidth);
|
||||
Assert(elements[iOffset] == NULL);
|
||||
|
||||
elements[iOffset] = ie->getOperand(1);
|
||||
|
||||
llvm::Value *insertBase = ie->getOperand(0);
|
||||
ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
|
||||
if (ie == NULL) {
|
||||
if (llvm::isa<llvm::UndefValue>(insertBase))
|
||||
return;
|
||||
|
||||
llvm::ConstantVector *cv =
|
||||
llvm::dyn_cast<llvm::ConstantVector>(insertBase);
|
||||
Assert(cv != NULL);
|
||||
Assert(iOffset < (int)cv->getNumOperands());
|
||||
elements[iOffset] = cv->getOperand(iOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Tests to see if all of the elements of the vector in the 'v' parameter
|
||||
are equal. Like lValuesAreEqual(), this is a conservative test and may
|
||||
return false for arrays where the values are actually all equal. */
|
||||
bool
|
||||
LLVMVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
std::vector<llvm::PHINode *> &seenPhis) {
|
||||
if (llvm::isa<llvm::ConstantAggregateZero>(v))
|
||||
return true;
|
||||
|
||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v);
|
||||
if (cv != NULL)
|
||||
return (cv->getSplatValue() != NULL);
|
||||
|
||||
llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v);
|
||||
if (bop != NULL)
|
||||
return (LLVMVectorValuesAllEqual(bop->getOperand(0), vectorLength,
|
||||
seenPhis) &&
|
||||
LLVMVectorValuesAllEqual(bop->getOperand(1), vectorLength,
|
||||
seenPhis));
|
||||
|
||||
llvm::CastInst *cast = llvm::dyn_cast<llvm::CastInst>(v);
|
||||
if (cast != NULL)
|
||||
return LLVMVectorValuesAllEqual(cast->getOperand(0), vectorLength,
|
||||
seenPhis);
|
||||
|
||||
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(v);
|
||||
if (ie != NULL) {
|
||||
llvm::Value *elements[ISPC_MAX_NVEC];
|
||||
LLVMFlattenInsertChain(ie, vectorLength, elements);
|
||||
|
||||
// We will ignore any values of elements[] that are NULL; as they
|
||||
// correspond to undefined values--we just want to see if all of
|
||||
// the defined values have the same value.
|
||||
int lastNonNull = 0;
|
||||
while (lastNonNull < vectorLength && elements[lastNonNull] == NULL)
|
||||
++lastNonNull;
|
||||
|
||||
if (lastNonNull == vectorLength)
|
||||
// all of them are undef!
|
||||
return true;
|
||||
|
||||
for (int i = lastNonNull; i < vectorLength; ++i) {
|
||||
if (elements[i] == NULL)
|
||||
continue;
|
||||
|
||||
std::vector<llvm::PHINode *> seenPhi0;
|
||||
std::vector<llvm::PHINode *> seenPhi1;
|
||||
if (lValuesAreEqual(elements[lastNonNull], elements[i], seenPhi0,
|
||||
seenPhi1) == false)
|
||||
return false;
|
||||
lastNonNull = i;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
llvm::PHINode *phi = llvm::dyn_cast<llvm::PHINode>(v);
|
||||
if (phi) {
|
||||
for (unsigned int i = 0; i < seenPhis.size(); ++i)
|
||||
if (seenPhis[i] == phi)
|
||||
return true;
|
||||
|
||||
seenPhis.push_back(phi);
|
||||
|
||||
unsigned int numIncoming = phi->getNumIncomingValues();
|
||||
// Check all of the incoming values: if all of them are all equal,
|
||||
// then we're good.
|
||||
for (unsigned int i = 0; i < numIncoming; ++i) {
|
||||
if (!LLVMVectorValuesAllEqual(phi->getIncomingValue(i), vectorLength,
|
||||
seenPhis)) {
|
||||
seenPhis.pop_back();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
seenPhis.pop_back();
|
||||
return true;
|
||||
}
|
||||
|
||||
Assert(!llvm::isa<llvm::Constant>(v));
|
||||
|
||||
if (llvm::isa<llvm::CallInst>(v) || llvm::isa<llvm::LoadInst>(v) ||
|
||||
!llvm::isa<llvm::Instruction>(v))
|
||||
return false;
|
||||
|
||||
llvm::ShuffleVectorInst *shuffle = llvm::dyn_cast<llvm::ShuffleVectorInst>(v);
|
||||
if (shuffle != NULL) {
|
||||
llvm::Value *indices = shuffle->getOperand(2);
|
||||
if (LLVMVectorValuesAllEqual(indices, vectorLength, seenPhis))
|
||||
// The easy case--just a smear of the same element across the
|
||||
// whole vector.
|
||||
return true;
|
||||
|
||||
// TODO: handle more general cases?
|
||||
return false;
|
||||
}
|
||||
|
||||
#if 0
|
||||
fprintf(stderr, "all equal: ");
|
||||
v->dump();
|
||||
fprintf(stderr, "\n");
|
||||
llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(v);
|
||||
if (inst) {
|
||||
inst->getParent()->dump();
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
23
llvmutil.h
23
llvmutil.h
@@ -38,12 +38,23 @@
|
||||
#ifndef ISPC_LLVMUTIL_H
|
||||
#define ISPC_LLVMUTIL_H 1
|
||||
|
||||
#include "ispc.h"
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Type.h>
|
||||
#include <llvm/DerivedTypes.h>
|
||||
#include <llvm/Constants.h>
|
||||
|
||||
namespace llvm {
|
||||
class PHINode;
|
||||
class InsertElementInst;
|
||||
}
|
||||
|
||||
// llvm::Type *s are no longer const in llvm 3.0
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#define LLVM_TYPE_CONST
|
||||
#else
|
||||
#define LLVM_TYPE_CONST const
|
||||
#endif
|
||||
|
||||
|
||||
/** This structure holds pointers to a variety of LLVM types; code
|
||||
elsewhere can use them from here, ratherthan needing to make more
|
||||
@@ -99,6 +110,7 @@ extern llvm::Constant *LLVMTrue, *LLVMFalse;
|
||||
of LLVMTypes and the LLVMTrue/LLVMFalse constants. However, it can't
|
||||
be called until the compilation target is known.
|
||||
*/
|
||||
struct Target;
|
||||
extern void InitLLVMUtil(llvm::LLVMContext *ctx, Target target);
|
||||
|
||||
/** Returns an LLVM i8 constant of the given value */
|
||||
@@ -205,4 +217,13 @@ extern llvm::Constant *LLVMMaskAllOn;
|
||||
/** LLVM constant value representing an 'all off' SIMD lane mask */
|
||||
extern llvm::Constant *LLVMMaskAllOff;
|
||||
|
||||
/** Tests to see if all of the elements of the vector in the 'v' parameter
|
||||
are equal. Like lValuesAreEqual(), this is a conservative test and may
|
||||
return false for arrays where the values are actually all equal. */
|
||||
extern bool LLVMVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
std::vector<llvm::PHINode *> &seenPhis);
|
||||
|
||||
void LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
|
||||
llvm::Value **elements);
|
||||
|
||||
#endif // ISPC_LLVMUTIL_H
|
||||
|
||||
106
main.cpp
106
main.cpp
@@ -37,6 +37,8 @@
|
||||
|
||||
#include "ispc.h"
|
||||
#include "module.h"
|
||||
#include "util.h"
|
||||
#include "type.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <llvm/Support/PrettyStackTrace.h>
|
||||
@@ -52,24 +54,47 @@
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#define strcasecmp stricmp
|
||||
#ifndef BUILD_DATE
|
||||
#define BUILD_DATE __DATE__
|
||||
#endif
|
||||
#define BUILD_VERSION ""
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
|
||||
static void usage(int ret) {
|
||||
printf("This is the Intel(r) SPMD Program Compiler (ispc), build %s (%s)\n\n",
|
||||
BUILD_DATE, BUILD_VERSION);
|
||||
printf("usage: ispc\n");
|
||||
static void
|
||||
lPrintVersion() {
|
||||
printf("Intel(r) SPMD Program Compiler (ispc), build %s (%s, LLVM %s)\n",
|
||||
BUILD_DATE, BUILD_VERSION,
|
||||
#ifdef LLVM_2_9
|
||||
"2.9"
|
||||
#elif defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||
"3.0"
|
||||
#elif defined(LLVM_3_1) || defined(LLVM_3_1svn)
|
||||
"3.1"
|
||||
#else
|
||||
#error "Unhandled LLVM version"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
usage(int ret) {
|
||||
lPrintVersion();
|
||||
printf("\nusage: ispc\n");
|
||||
printf(" [--addressing={32,64}]\t\tSelect 32- or 64-bit addressing. (Note that 32-bit\n");
|
||||
printf(" \t\taddressing calculations are done by default, even\n");
|
||||
printf(" \t\ton 64-bit target architectures.)\n");
|
||||
printf(" [--arch={%s}]\t\tSelect target architecture\n",
|
||||
Target::SupportedTargetArchs());
|
||||
printf(" [--c++-include-file=<name>]\t\tSpecify name of file to emit in #include statement in generated C++ code.\n");
|
||||
printf(" [--cpu=<cpu>]\t\t\tSelect target CPU type\n");
|
||||
printf(" <cpu>={%s}\n", Target::SupportedTargetCPUs());
|
||||
printf(" [-D<foo>]\t\t\t\t#define given value when running preprocessor\n");
|
||||
printf(" [--debug]\t\t\t\tPrint information useful for debugging ispc\n");
|
||||
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
|
||||
#ifndef LLVM_2_9
|
||||
printf(" [--emit-c++]\t\t\tEmit a C++ source file as output\n");
|
||||
#endif // !LLVM_2_9
|
||||
printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
|
||||
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
|
||||
printf(" [-g]\t\t\t\tGenerate debugging information\n");
|
||||
@@ -91,15 +116,15 @@ static void usage(int ret) {
|
||||
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
||||
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
||||
#if 0
|
||||
printf(" disable-handle-pseudo-memory-ops\n");
|
||||
printf(" disable-all-on-optimizations\n");
|
||||
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
|
||||
printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n");
|
||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
|
||||
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
|
||||
printf(" disable-handle-pseudo-memory-ops\n");
|
||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
|
||||
#endif
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
printf(" [--pic]\t\t\t\tGenerate position-independent code\n");
|
||||
@@ -166,10 +191,12 @@ int main(int Argc, char *Argv[]) {
|
||||
char *argv[128];
|
||||
lGetAllArgs(Argc, Argv, argc, argv);
|
||||
|
||||
#if 0
|
||||
// Use LLVM's little utility function to print out nice stack traces if
|
||||
// we crash
|
||||
llvm::sys::PrintStackTraceOnErrorSignal();
|
||||
llvm::PrettyStackTraceProgram X(argc, argv);
|
||||
#endif
|
||||
|
||||
// initialize available LLVM targets
|
||||
LLVMInitializeX86TargetInfo();
|
||||
@@ -181,9 +208,12 @@ int main(int Argc, char *Argv[]) {
|
||||
LLVMInitializeX86TargetMC();
|
||||
#endif
|
||||
|
||||
AtomicType::Init();
|
||||
|
||||
char *file = NULL;
|
||||
const char *headerFileName = NULL;
|
||||
const char *outFileName = NULL;
|
||||
const char *includeFileName = NULL;
|
||||
|
||||
// Initiailize globals early so that we can set various option values
|
||||
// as we're parsing below
|
||||
@@ -203,7 +233,7 @@ int main(int Argc, char *Argv[]) {
|
||||
if (atoi(argv[i] + 13) == 64)
|
||||
g->opt.force32BitAddressing = false;
|
||||
else if (atoi(argv[i] + 13) == 32)
|
||||
g->opt.force32BitAddressing = 32;
|
||||
g->opt.force32BitAddressing = true;
|
||||
else {
|
||||
fprintf(stderr, "Addressing width \"%s\" invalid--only 32 and "
|
||||
"64 are allowed.\n", argv[i]+13);
|
||||
@@ -233,13 +263,20 @@ int main(int Argc, char *Argv[]) {
|
||||
}
|
||||
else if (!strcmp(argv[i], "--emit-asm"))
|
||||
ot = Module::Asm;
|
||||
#ifndef LLVM_2_9
|
||||
else if (!strcmp(argv[i], "--emit-c++"))
|
||||
ot = Module::CXX;
|
||||
#endif // !LLVM_2_9
|
||||
else if (!strcmp(argv[i], "--emit-llvm"))
|
||||
ot = Module::Bitcode;
|
||||
else if (!strcmp(argv[i], "--emit-obj"))
|
||||
ot = Module::Object;
|
||||
else if (!strcmp(argv[i], "--target")) {
|
||||
// FIXME: should remove this way of specifying the target...
|
||||
if (++i == argc) usage(1);
|
||||
if (++i == argc) {
|
||||
fprintf(stderr, "No target specified after --target option.\n");
|
||||
usage(1);
|
||||
}
|
||||
target = argv[i];
|
||||
}
|
||||
else if (!strncmp(argv[i], "--target=", 9))
|
||||
@@ -254,8 +291,10 @@ int main(int Argc, char *Argv[]) {
|
||||
g->mathLib = Globals::Math_SVML;
|
||||
else if (!strcmp(lib, "system"))
|
||||
g->mathLib = Globals::Math_System;
|
||||
else
|
||||
else {
|
||||
fprintf(stderr, "Unknown --math-lib= option \"%s\".\n", lib);
|
||||
usage(1);
|
||||
}
|
||||
}
|
||||
else if (!strncmp(argv[i], "--opt=", 6)) {
|
||||
const char *opt = argv[i] + 6;
|
||||
@@ -270,6 +309,8 @@ int main(int Argc, char *Argv[]) {
|
||||
|
||||
// These are only used for performance tests of specific
|
||||
// optimizations
|
||||
else if (!strcmp(opt, "disable-all-on-optimizations"))
|
||||
g->opt.disableMaskAllOnOptimizations = true;
|
||||
else if (!strcmp(opt, "disable-handle-pseudo-memory-ops"))
|
||||
g->opt.disableHandlePseudoMemoryOps = true;
|
||||
else if (!strcmp(opt, "disable-blended-masked-stores"))
|
||||
@@ -286,10 +327,10 @@ int main(int Argc, char *Argv[]) {
|
||||
g->opt.disableGatherScatterFlattening = true;
|
||||
else if (!strcmp(opt, "disable-uniform-memory-optimizations"))
|
||||
g->opt.disableUniformMemoryOptimizations = true;
|
||||
else if (!strcmp(opt, "disable-masked-store-optimizations"))
|
||||
g->opt.disableMaskedStoreOptimizations = true;
|
||||
else
|
||||
else {
|
||||
fprintf(stderr, "Unknown --opt= option \"%s\".\n", opt);
|
||||
usage(1);
|
||||
}
|
||||
}
|
||||
else if (!strcmp(argv[i], "--woff") || !strcmp(argv[i], "-woff")) {
|
||||
g->disableWarnings = true;
|
||||
@@ -302,18 +343,27 @@ int main(int Argc, char *Argv[]) {
|
||||
else if (!strcmp(argv[i], "--wno-perf") || !strcmp(argv[i], "-wno-perf"))
|
||||
g->emitPerfWarnings = false;
|
||||
else if (!strcmp(argv[i], "-o")) {
|
||||
if (++i == argc) usage(1);
|
||||
if (++i == argc) {
|
||||
fprintf(stderr, "No output file specified after -o option.\n");
|
||||
usage(1);
|
||||
}
|
||||
outFileName = argv[i];
|
||||
}
|
||||
else if (!strcmp(argv[i], "--outfile="))
|
||||
outFileName = argv[i] + strlen("--outfile=");
|
||||
else if (!strcmp(argv[i], "-h")) {
|
||||
if (++i == argc) usage(1);
|
||||
if (++i == argc) {
|
||||
fprintf(stderr, "No header file name specified after -h option.\n");
|
||||
usage(1);
|
||||
}
|
||||
headerFileName = argv[i];
|
||||
}
|
||||
else if (!strcmp(argv[i], "--header-outfile=")) {
|
||||
else if (!strncmp(argv[i], "--header-outfile=", 17)) {
|
||||
headerFileName = argv[i] + strlen("--header-outfile=");
|
||||
}
|
||||
else if (!strncmp(argv[i], "--c++-include-file=", 19)) {
|
||||
includeFileName = argv[i] + strlen("--c++-include-file=");
|
||||
}
|
||||
else if (!strcmp(argv[i], "-O0")) {
|
||||
g->opt.level = 0;
|
||||
optSet = true;
|
||||
@@ -334,15 +384,19 @@ int main(int Argc, char *Argv[]) {
|
||||
generatePIC = true;
|
||||
#endif // !ISPC_IS_WINDOWS
|
||||
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
|
||||
printf("Intel(r) SPMD Program Compiler (ispc) build %s (%s)\n",
|
||||
BUILD_DATE, BUILD_VERSION);
|
||||
lPrintVersion();
|
||||
return 0;
|
||||
}
|
||||
else if (argv[i][0] == '-')
|
||||
else if (argv[i][0] == '-') {
|
||||
fprintf(stderr, "Unknown option \"%s\".\n", argv[i]);
|
||||
usage(1);
|
||||
}
|
||||
else {
|
||||
if (file != NULL)
|
||||
if (file != NULL) {
|
||||
fprintf(stderr, "Multiple input files specified on command "
|
||||
"line: \"%s\" and \"%s\".\n", file, argv[i]);
|
||||
usage(1);
|
||||
}
|
||||
else
|
||||
file = argv[i];
|
||||
}
|
||||
@@ -354,6 +408,12 @@ int main(int Argc, char *Argv[]) {
|
||||
if (debugSet && !optSet)
|
||||
g->opt.level = 0;
|
||||
|
||||
if (outFileName == NULL && headerFileName == NULL)
|
||||
Warning(SourcePos(), "No output file or header file name specified. "
|
||||
"Program will be compiled and warnings/errors will "
|
||||
"be issued, but no output will be generated.");
|
||||
|
||||
return Module::CompileAndOutput(file, arch, cpu, target, generatePIC,
|
||||
ot, outFileName, headerFileName);
|
||||
ot, outFileName, headerFileName,
|
||||
includeFileName);
|
||||
}
|
||||
|
||||
116
module.cpp
116
module.cpp
@@ -49,7 +49,6 @@
|
||||
#include "llvmutil.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <stdarg.h>
|
||||
#include <ctype.h>
|
||||
#include <sys/types.h>
|
||||
@@ -77,7 +76,6 @@
|
||||
#include <llvm/Target/TargetMachine.h>
|
||||
#include <llvm/Target/TargetOptions.h>
|
||||
#include <llvm/Target/TargetData.h>
|
||||
#include <llvm/PassManager.h>
|
||||
#include <llvm/Analysis/Verifier.h>
|
||||
#include <llvm/Support/CFG.h>
|
||||
#include <clang/Frontend/CompilerInstance.h>
|
||||
@@ -150,8 +148,10 @@ extern void yy_delete_buffer(YY_BUFFER_STATE);
|
||||
|
||||
int
|
||||
Module::CompileFile() {
|
||||
#ifndef LLVM_3_1svn
|
||||
if (g->opt.fastMath == true)
|
||||
llvm::UnsafeFPMath = true;
|
||||
#endif // !LLVM_3_1svn
|
||||
|
||||
// FIXME: it'd be nice to do this in the Module constructor, but this
|
||||
// function ends up calling into routines that expect the global
|
||||
@@ -222,7 +222,7 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) {
|
||||
if (sym == NULL || sym->type == NULL) {
|
||||
// But if these are NULL and there haven't been any previous
|
||||
// errors, something surprising is going on
|
||||
assert(errorCount > 0);
|
||||
Assert(errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -262,7 +262,7 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) {
|
||||
"global variable \"%s\".", sym->name.c_str());
|
||||
}
|
||||
else if (initExpr != NULL) {
|
||||
initExpr = initExpr->TypeCheck();
|
||||
initExpr = TypeCheck(initExpr);
|
||||
if (initExpr != NULL) {
|
||||
// We need to make sure the initializer expression is
|
||||
// the same type as the global. (But not if it's an
|
||||
@@ -272,7 +272,7 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) {
|
||||
initExpr = TypeConvertExpr(initExpr, sym->type, "initializer");
|
||||
|
||||
if (initExpr != NULL) {
|
||||
initExpr = initExpr->Optimize();
|
||||
initExpr = Optimize(initExpr);
|
||||
// Fingers crossed, now let's see if we've got a
|
||||
// constant value..
|
||||
llvmInitializer = initExpr->GetConstant(sym->type);
|
||||
@@ -389,7 +389,7 @@ void
|
||||
Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
|
||||
const FunctionType *functionType =
|
||||
dynamic_cast<const FunctionType *>(funSym->type);
|
||||
assert(functionType != NULL);
|
||||
Assert(functionType != NULL);
|
||||
|
||||
// If a global variable with the same name has already been declared
|
||||
// issue an error.
|
||||
@@ -416,7 +416,7 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
|
||||
// allowed.
|
||||
const FunctionType *ofType =
|
||||
dynamic_cast<const FunctionType *>(overloadFunc->type);
|
||||
assert(ofType != NULL);
|
||||
Assert(ofType != NULL);
|
||||
if (ofType->GetNumParameters() == functionType->GetNumParameters()) {
|
||||
int i;
|
||||
for (i = 0; i < functionType->GetNumParameters(); ++i) {
|
||||
@@ -571,7 +571,7 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
|
||||
// Finally, we know all is good and we can add the function to the
|
||||
// symbol table
|
||||
bool ok = symbolTable->AddFunction(funSym);
|
||||
assert(ok);
|
||||
Assert(ok);
|
||||
}
|
||||
|
||||
|
||||
@@ -583,7 +583,8 @@ Module::AddFunctionDefinition(Symbol *sym, const std::vector<Symbol *> &args,
|
||||
|
||||
|
||||
bool
|
||||
Module::writeOutput(OutputType outputType, const char *outFileName) {
|
||||
Module::writeOutput(OutputType outputType, const char *outFileName,
|
||||
const char *includeFileName) {
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
if (diBuilder != NULL && outputType != Header)
|
||||
diBuilder->finalize();
|
||||
@@ -609,6 +610,14 @@ Module::writeOutput(OutputType outputType, const char *outFileName) {
|
||||
if (strcasecmp(suffix, "o") && strcasecmp(suffix, "obj"))
|
||||
fileType = "object";
|
||||
break;
|
||||
#ifndef LLVM_2_9
|
||||
case CXX:
|
||||
if (strcasecmp(suffix, "c") && strcasecmp(suffix, "cc") &&
|
||||
strcasecmp(suffix, "c++") && strcasecmp(suffix, "cxx") &&
|
||||
strcasecmp(suffix, "cpp"))
|
||||
fileType = "c++";
|
||||
break;
|
||||
#endif // !LLVM_2_9
|
||||
case Header:
|
||||
if (strcasecmp(suffix, "h") && strcasecmp(suffix, "hh") &&
|
||||
strcasecmp(suffix, "hpp"))
|
||||
@@ -622,12 +631,18 @@ Module::writeOutput(OutputType outputType, const char *outFileName) {
|
||||
|
||||
if (outputType == Header)
|
||||
return writeHeader(outFileName);
|
||||
else {
|
||||
if (outputType == Bitcode)
|
||||
return writeBitcode(module, outFileName);
|
||||
else
|
||||
return writeObjectFileOrAssembly(outputType, outFileName);
|
||||
else if (outputType == Bitcode)
|
||||
return writeBitcode(module, outFileName);
|
||||
#ifndef LLVM_2_9
|
||||
else if (outputType == CXX) {
|
||||
extern bool WriteCXXFile(llvm::Module *module, const char *fn,
|
||||
int vectorWidth, const char *includeName);
|
||||
return WriteCXXFile(module, outFileName, g->target.vectorWidth,
|
||||
includeFileName);
|
||||
}
|
||||
#endif // !LLVM_2_9
|
||||
else
|
||||
return writeObjectFileOrAssembly(outputType, outFileName);
|
||||
}
|
||||
|
||||
|
||||
@@ -729,7 +744,7 @@ static void
|
||||
lVisitNode(const StructType *structType,
|
||||
std::map<const StructType *, StructDAGNode *> &structToNode,
|
||||
std::vector<const StructType *> &sortedTypes) {
|
||||
assert(structToNode.find(structType) != structToNode.end());
|
||||
Assert(structToNode.find(structType) != structToNode.end());
|
||||
// Get the node that encodes the structs that this one is immediately
|
||||
// dependent on.
|
||||
StructDAGNode *node = structToNode[structType];
|
||||
@@ -793,7 +808,7 @@ lEmitStructDecls(std::vector<const StructType *> &structTypes, FILE *file) {
|
||||
if (hasIncomingEdges.find(structType) == hasIncomingEdges.end())
|
||||
lVisitNode(structType, structToNode, sortedTypes);
|
||||
}
|
||||
assert(sortedTypes.size() == structTypes.size());
|
||||
Assert(sortedTypes.size() == structTypes.size());
|
||||
|
||||
// And finally we can emit the struct declarations by going through the
|
||||
// sorted ones in order.
|
||||
@@ -828,10 +843,10 @@ lEmitEnumDecls(const std::vector<const EnumType *> &enumTypes, FILE *file) {
|
||||
// Print the individual enumerators
|
||||
for (int j = 0; j < enumTypes[i]->GetEnumeratorCount(); ++j) {
|
||||
const Symbol *e = enumTypes[i]->GetEnumerator(j);
|
||||
assert(e->constValue != NULL);
|
||||
Assert(e->constValue != NULL);
|
||||
unsigned int enumValue;
|
||||
int count = e->constValue->AsUInt32(&enumValue);
|
||||
assert(count == 1);
|
||||
Assert(count == 1);
|
||||
|
||||
// Always print an initializer to set the value. We could be
|
||||
// 'clever' here and detect whether the implicit value given by
|
||||
@@ -897,7 +912,7 @@ lAddTypeIfNew(const Type *type, std::vector<const T *> *exportedTypes) {
|
||||
return;
|
||||
|
||||
const T *castType = dynamic_cast<const T *>(type);
|
||||
assert(castType != NULL);
|
||||
Assert(castType != NULL);
|
||||
exportedTypes->push_back(castType);
|
||||
}
|
||||
|
||||
@@ -934,7 +949,7 @@ lGetExportedTypes(const Type *type,
|
||||
else if (dynamic_cast<const VectorType *>(type) != NULL)
|
||||
lAddTypeIfNew(type, exportedVectorTypes);
|
||||
else
|
||||
assert(dynamic_cast<const AtomicType *>(type) != NULL);
|
||||
Assert(dynamic_cast<const AtomicType *>(type) != NULL);
|
||||
}
|
||||
|
||||
|
||||
@@ -965,7 +980,7 @@ lPrintFunctionDeclarations(FILE *file, const std::vector<Symbol *> &funcs) {
|
||||
fprintf(file, "#ifdef __cplusplus\nextern \"C\" {\n#endif // __cplusplus\n");
|
||||
for (unsigned int i = 0; i < funcs.size(); ++i) {
|
||||
const FunctionType *ftype = dynamic_cast<const FunctionType *>(funcs[i]->type);
|
||||
assert(ftype);
|
||||
Assert(ftype);
|
||||
std::string decl = ftype->GetCDeclaration(funcs[i]->name);
|
||||
fprintf(file, " extern %s;\n", decl.c_str());
|
||||
}
|
||||
@@ -990,7 +1005,7 @@ lPrintExternGlobals(FILE *file, const std::vector<Symbol *> &externGlobals) {
|
||||
static bool
|
||||
lIsExported(const Symbol *sym) {
|
||||
const FunctionType *ft = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(ft);
|
||||
Assert(ft);
|
||||
return ft->isExported;
|
||||
}
|
||||
|
||||
@@ -998,7 +1013,7 @@ lIsExported(const Symbol *sym) {
|
||||
static bool
|
||||
lIsExternC(const Symbol *sym) {
|
||||
const FunctionType *ft = dynamic_cast<const FunctionType *>(sym->type);
|
||||
assert(ft);
|
||||
Assert(ft);
|
||||
return ft->isExternC;
|
||||
}
|
||||
|
||||
@@ -1157,19 +1172,19 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
|
||||
opts.addMacroDef("PI=3.1415926535");
|
||||
|
||||
// Add #define for current compilation target
|
||||
switch (g->target.isa) {
|
||||
case Target::SSE2:
|
||||
opts.addMacroDef("ISPC_TARGET_SSE2");
|
||||
break;
|
||||
case Target::SSE4:
|
||||
opts.addMacroDef("ISPC_TARGET_SSE4");
|
||||
break;
|
||||
case Target::AVX:
|
||||
opts.addMacroDef("ISPC_TARGET_AVX");
|
||||
break;
|
||||
default:
|
||||
FATAL("Unhandled target ISA in preprocessor symbol definition");
|
||||
char targetMacro[128];
|
||||
sprintf(targetMacro, "ISPC_TARGET_%s", g->target.GetISAString());
|
||||
char *p = targetMacro;
|
||||
while (*p) {
|
||||
*p = toupper(*p);
|
||||
++p;
|
||||
}
|
||||
opts.addMacroDef(targetMacro);
|
||||
|
||||
if (g->target.is32Bit)
|
||||
opts.addMacroDef("ISPC_POINTER_SIZE=32");
|
||||
else
|
||||
opts.addMacroDef("ISPC_POINTER_SIZE=64");
|
||||
|
||||
opts.addMacroDef("ISPC_MAJOR_VERSION=1");
|
||||
opts.addMacroDef("ISPC_MINOR_VERSION=1");
|
||||
@@ -1317,7 +1332,7 @@ lExtractAndRewriteGlobals(llvm::Module *module,
|
||||
|
||||
Symbol *sym =
|
||||
m->symbolTable->LookupVariable(gv->getName().str().c_str());
|
||||
assert(sym != NULL);
|
||||
Assert(sym != NULL);
|
||||
globals->push_back(RewriteGlobalInfo(gv, init, sym->pos));
|
||||
}
|
||||
}
|
||||
@@ -1366,9 +1381,9 @@ lAddExtractedGlobals(llvm::Module *module,
|
||||
if (globals[j].size() > 0) {
|
||||
// There should be the same number of globals in the other
|
||||
// vectors, in the same order.
|
||||
assert(globals[firstActive].size() == globals[j].size());
|
||||
Assert(globals[firstActive].size() == globals[j].size());
|
||||
llvm::GlobalVariable *gv2 = globals[j][i].gv;
|
||||
assert(gv2->getName() == gv->getName());
|
||||
Assert(gv2->getName() == gv->getName());
|
||||
|
||||
// It is possible that the types may not match, though--for
|
||||
// example, this happens with varying globals if we compile
|
||||
@@ -1422,7 +1437,7 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc,
|
||||
|
||||
// Grab the type of the function as well.
|
||||
if (ftype != NULL)
|
||||
assert(ftype == funcs.func[i]->getFunctionType());
|
||||
Assert(ftype == funcs.func[i]->getFunctionType());
|
||||
else
|
||||
ftype = funcs.func[i]->getFunctionType();
|
||||
|
||||
@@ -1510,7 +1525,7 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc,
|
||||
// or some such, but we don't want to start imposing too much of a
|
||||
// runtime library requirement either...
|
||||
llvm::Function *abortFunc = module->getFunction("abort");
|
||||
assert(abortFunc);
|
||||
Assert(abortFunc);
|
||||
llvm::CallInst::Create(abortFunc, "", bblock);
|
||||
|
||||
// Return an undef value from the function here; we won't get to this
|
||||
@@ -1542,10 +1557,10 @@ lCreateDispatchModule(std::map<std::string, FunctionTargetVariants> &functions)
|
||||
|
||||
// Get pointers to things we need below
|
||||
llvm::Function *setFunc = module->getFunction("__set_system_isa");
|
||||
assert(setFunc != NULL);
|
||||
Assert(setFunc != NULL);
|
||||
llvm::Value *systemBestISAPtr =
|
||||
module->getGlobalVariable("__system_best_isa", true);
|
||||
assert(systemBestISAPtr != NULL);
|
||||
Assert(systemBestISAPtr != NULL);
|
||||
|
||||
// For each exported function, create the dispatch function
|
||||
std::map<std::string, FunctionTargetVariants>::iterator iter;
|
||||
@@ -1567,7 +1582,8 @@ lCreateDispatchModule(std::map<std::string, FunctionTargetVariants> &functions)
|
||||
int
|
||||
Module::CompileAndOutput(const char *srcFile, const char *arch, const char *cpu,
|
||||
const char *target, bool generatePIC, OutputType outputType,
|
||||
const char *outFileName, const char *headerFileName) {
|
||||
const char *outFileName, const char *headerFileName,
|
||||
const char *includeFileName) {
|
||||
if (target == NULL || strchr(target, ',') == NULL) {
|
||||
// We're only compiling to a single target
|
||||
if (!Target::GetTarget(arch, cpu, target, generatePIC, &g->target))
|
||||
@@ -1576,7 +1592,7 @@ Module::CompileAndOutput(const char *srcFile, const char *arch, const char *cpu,
|
||||
m = new Module(srcFile);
|
||||
if (m->CompileFile() == 0) {
|
||||
if (outFileName != NULL)
|
||||
if (!m->writeOutput(outputType, outFileName))
|
||||
if (!m->writeOutput(outputType, outFileName, includeFileName))
|
||||
return 1;
|
||||
if (headerFileName != NULL)
|
||||
if (!m->writeOutput(Module::Header, headerFileName))
|
||||
@@ -1589,9 +1605,17 @@ Module::CompileAndOutput(const char *srcFile, const char *arch, const char *cpu,
|
||||
return errorCount > 0;
|
||||
}
|
||||
else {
|
||||
#ifndef LLVM_2_9
|
||||
if (outputType == CXX) {
|
||||
Error(SourcePos(), "Illegal to specify more then one target when "
|
||||
"compiling C++ output.");
|
||||
return 1;
|
||||
}
|
||||
#endif // !LLVM_2_9
|
||||
|
||||
// The user supplied multiple targets
|
||||
std::vector<std::string> targets = lExtractTargets(target);
|
||||
assert(targets.size() > 1);
|
||||
Assert(targets.size() > 1);
|
||||
|
||||
if (outFileName != NULL && strcmp(outFileName, "-") == 0) {
|
||||
Error(SourcePos(), "Multi-target compilation can't generate output "
|
||||
@@ -1668,7 +1692,7 @@ Module::CompileAndOutput(const char *srcFile, const char *arch, const char *cpu,
|
||||
int i = 1;
|
||||
while (i < Target::NUM_ISAS && firstTargetMachine == NULL)
|
||||
firstTargetMachine = targetMachines[i++];
|
||||
assert(firstTargetMachine != NULL);
|
||||
Assert(firstTargetMachine != NULL);
|
||||
|
||||
if (outFileName != NULL) {
|
||||
if (outputType == Bitcode)
|
||||
|
||||
13
module.h
13
module.h
@@ -80,6 +80,9 @@ public:
|
||||
enum OutputType { Asm, /** Generate text assembly language output */
|
||||
Bitcode, /** Generate LLVM IR bitcode output */
|
||||
Object, /** Generate a native object file */
|
||||
#ifndef LLVM_2_9
|
||||
CXX, /** Generate a C++ file */
|
||||
#endif // !LLVM_2_9
|
||||
Header /** Generate a C/C++ header file with
|
||||
declarations of 'export'ed functions, global
|
||||
variables, and the types used by them. */
|
||||
@@ -108,6 +111,10 @@ public:
|
||||
inclusion from C/C++ code with declarations of
|
||||
types and functions exported from the given ispc
|
||||
source file.
|
||||
@param includeFileName If non-NULL, gives the filename for the C++
|
||||
backend to emit in an #include statement to
|
||||
get definitions of the builtins for the generic
|
||||
target.
|
||||
@return Number of errors encountered when compiling
|
||||
srcFile.
|
||||
*/
|
||||
@@ -115,7 +122,8 @@ public:
|
||||
const char *cpu, const char *targets,
|
||||
bool generatePIC, OutputType outputType,
|
||||
const char *outFileName,
|
||||
const char *headerFileName);
|
||||
const char *headerFileName,
|
||||
const char *includeFileName);
|
||||
|
||||
/** Total number of errors encountered during compilation. */
|
||||
int errorCount;
|
||||
@@ -138,7 +146,8 @@ private:
|
||||
true on success, false if there has been an error. The given
|
||||
filename may be NULL, indicating that output should go to standard
|
||||
output. */
|
||||
bool writeOutput(OutputType ot, const char *filename);
|
||||
bool writeOutput(OutputType ot, const char *filename,
|
||||
const char *includeFileName = NULL);
|
||||
bool writeHeader(const char *filename);
|
||||
bool writeObjectFileOrAssembly(OutputType outputType, const char *filename);
|
||||
static bool writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine,
|
||||
|
||||
197
parse.yy
197
parse.yy
@@ -134,9 +134,8 @@ struct ForeachDimension {
|
||||
%}
|
||||
|
||||
%union {
|
||||
int32_t int32Val;
|
||||
double floatVal;
|
||||
int64_t int64Val;
|
||||
int64_t intVal;
|
||||
float floatVal;
|
||||
std::string *stringVal;
|
||||
const char *constCharPtr;
|
||||
|
||||
@@ -225,8 +224,8 @@ struct ForeachDimension {
|
||||
%type <declSpecs> declaration_specifiers
|
||||
|
||||
%type <stringVal> string_constant
|
||||
%type <constCharPtr> struct_or_union_name enum_identifier
|
||||
%type <int32Val> int_constant soa_width_specifier
|
||||
%type <constCharPtr> struct_or_union_name enum_identifier goto_identifier
|
||||
%type <intVal> int_constant soa_width_specifier
|
||||
|
||||
%type <foreachDimension> foreach_dimension_specifier
|
||||
%type <foreachDimensionList> foreach_dimension_list
|
||||
@@ -259,16 +258,16 @@ primary_expression
|
||||
}
|
||||
}
|
||||
| TOKEN_INT32_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformConstInt32, yylval.int32Val, @1);
|
||||
$$ = new ConstExpr(AtomicType::UniformConstInt32, (int32_t)yylval.intVal, @1);
|
||||
}
|
||||
| TOKEN_UINT32_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformConstUInt32, (uint32_t)yylval.int32Val, @1);
|
||||
$$ = new ConstExpr(AtomicType::UniformConstUInt32, (uint32_t)yylval.intVal, @1);
|
||||
}
|
||||
| TOKEN_INT64_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformConstInt64, yylval.int64Val, @1);
|
||||
$$ = new ConstExpr(AtomicType::UniformConstInt64, (int64_t)yylval.intVal, @1);
|
||||
}
|
||||
| TOKEN_UINT64_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformConstUInt64, (uint64_t)yylval.int64Val, @1);
|
||||
$$ = new ConstExpr(AtomicType::UniformConstUInt64, (uint64_t)yylval.intVal, @1);
|
||||
}
|
||||
| TOKEN_FLOAT_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformConstFloat, (float)yylval.floatVal, @1);
|
||||
@@ -328,7 +327,7 @@ argument_expression_list
|
||||
| argument_expression_list ',' assignment_expression
|
||||
{
|
||||
ExprList *argList = dynamic_cast<ExprList *>($1);
|
||||
assert(argList != NULL);
|
||||
Assert(argList != NULL);
|
||||
argList->exprs.push_back($3);
|
||||
argList->pos = Union(argList->pos, @3);
|
||||
$$ = argList;
|
||||
@@ -363,13 +362,7 @@ cast_expression
|
||||
: unary_expression
|
||||
| '(' type_name ')' cast_expression
|
||||
{
|
||||
// Pass true here to try to preserve uniformity
|
||||
// so that things like:
|
||||
// uniform int y = ...;
|
||||
// uniform float x = 1. / (float)y;
|
||||
// don't issue an error due to (float)y being inadvertently
|
||||
// and undesirably-to-the-user "varying"...
|
||||
$$ = new TypeCastExpr($2, $4, true, Union(@1,@4));
|
||||
$$ = new TypeCastExpr($2, $4, Union(@1,@4));
|
||||
}
|
||||
;
|
||||
|
||||
@@ -501,6 +494,7 @@ declaration_statement
|
||||
$$ = NULL;
|
||||
}
|
||||
else {
|
||||
$1->DeclareFunctions();
|
||||
std::vector<VariableDeclaration> vars = $1->GetVariableDeclarations();
|
||||
$$ = new DeclStmt(vars, @1);
|
||||
}
|
||||
@@ -545,7 +539,7 @@ declaration_specifiers
|
||||
| soa_width_specifier
|
||||
{
|
||||
DeclSpecs *ds = new DeclSpecs;
|
||||
ds->soaWidth = $1;
|
||||
ds->soaWidth = (int32_t)$1;
|
||||
$$ = ds;
|
||||
}
|
||||
| soa_width_specifier declaration_specifiers
|
||||
@@ -555,7 +549,7 @@ declaration_specifiers
|
||||
if (ds->soaWidth != 0)
|
||||
Error(@1, "soa<> qualifier supplied multiple times in declaration.");
|
||||
else
|
||||
ds->soaWidth = $1;
|
||||
ds->soaWidth = (int32_t)$1;
|
||||
}
|
||||
$$ = ds;
|
||||
}
|
||||
@@ -566,7 +560,7 @@ declaration_specifiers
|
||||
| type_specifier '<' int_constant '>'
|
||||
{
|
||||
DeclSpecs *ds = new DeclSpecs($1);
|
||||
ds->vectorSize = $3;
|
||||
ds->vectorSize = (int32_t)$3;
|
||||
$$ = ds;
|
||||
}
|
||||
| type_specifier declaration_specifiers
|
||||
@@ -630,7 +624,7 @@ type_specifier
|
||||
: atomic_var_type_specifier { $$ = $1; }
|
||||
| TOKEN_TYPE_NAME
|
||||
{ const Type *t = m->symbolTable->LookupType(yytext);
|
||||
assert(t != NULL);
|
||||
Assert(t != NULL);
|
||||
$$ = t;
|
||||
}
|
||||
| struct_or_union_specifier { $$ = $1; }
|
||||
@@ -639,20 +633,20 @@ type_specifier
|
||||
|
||||
atomic_var_type_specifier
|
||||
: TOKEN_VOID { $$ = AtomicType::Void; }
|
||||
| TOKEN_BOOL { $$ = AtomicType::VaryingBool; }
|
||||
| TOKEN_INT8 { $$ = AtomicType::VaryingInt8; }
|
||||
| TOKEN_INT16 { $$ = AtomicType::VaryingInt16; }
|
||||
| TOKEN_INT { $$ = AtomicType::VaryingInt32; }
|
||||
| TOKEN_FLOAT { $$ = AtomicType::VaryingFloat; }
|
||||
| TOKEN_DOUBLE { $$ = AtomicType::VaryingDouble; }
|
||||
| TOKEN_INT64 { $$ = AtomicType::VaryingInt64; }
|
||||
| TOKEN_BOOL { $$ = AtomicType::UnboundBool; }
|
||||
| TOKEN_INT8 { $$ = AtomicType::UnboundInt8; }
|
||||
| TOKEN_INT16 { $$ = AtomicType::UnboundInt16; }
|
||||
| TOKEN_INT { $$ = AtomicType::UnboundInt32; }
|
||||
| TOKEN_FLOAT { $$ = AtomicType::UnboundFloat; }
|
||||
| TOKEN_DOUBLE { $$ = AtomicType::UnboundDouble; }
|
||||
| TOKEN_INT64 { $$ = AtomicType::UnboundInt64; }
|
||||
;
|
||||
|
||||
short_vec_specifier
|
||||
: atomic_var_type_specifier '<' int_constant '>'
|
||||
{
|
||||
Type* vt =
|
||||
new VectorType($1, $3);
|
||||
new VectorType($1, (int32_t)$3);
|
||||
$$ = vt;
|
||||
}
|
||||
;
|
||||
@@ -671,7 +665,7 @@ struct_or_union_specifier
|
||||
GetStructTypesNamesPositions(*$4, &elementTypes, &elementNames,
|
||||
&elementPositions);
|
||||
StructType *st = new StructType($2, elementTypes, elementNames,
|
||||
elementPositions, false, true, @2);
|
||||
elementPositions, false, Type::Unbound, @2);
|
||||
m->symbolTable->AddType($2, st, @2);
|
||||
$$ = st;
|
||||
}
|
||||
@@ -682,8 +676,9 @@ struct_or_union_specifier
|
||||
std::vector<SourcePos> elementPositions;
|
||||
GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames,
|
||||
&elementPositions);
|
||||
// FIXME: should be unbound
|
||||
$$ = new StructType("", elementTypes, elementNames, elementPositions,
|
||||
false, true, @1);
|
||||
false, Type::Unbound, @1);
|
||||
}
|
||||
| struct_or_union '{' '}'
|
||||
{
|
||||
@@ -749,7 +744,7 @@ specifier_qualifier_list
|
||||
else if ($1 == TYPEQUAL_SIGNED) {
|
||||
if ($2->IsIntType() == false) {
|
||||
Error(@1, "Can't apply \"signed\" qualifier to \"%s\" type.",
|
||||
$2->GetString().c_str());
|
||||
$2->ResolveUnboundVariability(Type::Varying)->GetString().c_str());
|
||||
$$ = $2;
|
||||
}
|
||||
}
|
||||
@@ -759,7 +754,7 @@ specifier_qualifier_list
|
||||
$$ = t;
|
||||
else {
|
||||
Error(@1, "Can't apply \"unsigned\" qualifier to \"%s\" type. Ignoring.",
|
||||
$2->GetString().c_str());
|
||||
$2->ResolveUnboundVariability(Type::Varying)->GetString().c_str());
|
||||
$$ = $2;
|
||||
}
|
||||
}
|
||||
@@ -776,8 +771,11 @@ specifier_qualifier_list
|
||||
else
|
||||
FATAL("Unhandled type qualifier in parser.");
|
||||
}
|
||||
else
|
||||
else {
|
||||
if (m->errorCount == 0)
|
||||
Error(@1, "Lost type qualifier in parser.");
|
||||
$$ = NULL;
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
@@ -930,7 +928,7 @@ declarator
|
||||
;
|
||||
|
||||
int_constant
|
||||
: TOKEN_INT32_CONSTANT { $$ = yylval.int32Val; }
|
||||
: TOKEN_INT32_CONSTANT { $$ = yylval.intVal; }
|
||||
;
|
||||
|
||||
direct_declarator
|
||||
@@ -948,10 +946,16 @@ direct_declarator
|
||||
{
|
||||
int size;
|
||||
if ($1 != NULL && lGetConstantInt($3, &size, @3, "Array dimension")) {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @4));
|
||||
d->arraySize = size;
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
if (size < 0) {
|
||||
Error(@3, "Array dimension must be non-negative.");
|
||||
$$ = NULL;
|
||||
}
|
||||
else {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @4));
|
||||
d->arraySize = size;
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
}
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
@@ -1107,8 +1111,7 @@ type_name
|
||||
abstract_declarator
|
||||
: pointer
|
||||
{
|
||||
Declarator *d = new Declarator(DK_POINTER, @1);
|
||||
$$ = d;
|
||||
$$ = $1;
|
||||
}
|
||||
| direct_abstract_declarator
|
||||
| pointer direct_abstract_declarator
|
||||
@@ -1142,10 +1145,16 @@ direct_abstract_declarator
|
||||
| '[' constant_expression ']'
|
||||
{
|
||||
int size;
|
||||
if (lGetConstantInt($2, &size, @2, "Array dimension")) {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @3));
|
||||
d->arraySize = size;
|
||||
$$ = d;
|
||||
if ($2 != NULL && lGetConstantInt($2, &size, @2, "Array dimension")) {
|
||||
if (size < 0) {
|
||||
Error(@2, "Array dimension must be non-negative.");
|
||||
$$ = NULL;
|
||||
}
|
||||
else {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @3));
|
||||
d->arraySize = size;
|
||||
$$ = d;
|
||||
}
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
@@ -1160,11 +1169,17 @@ direct_abstract_declarator
|
||||
| direct_abstract_declarator '[' constant_expression ']'
|
||||
{
|
||||
int size;
|
||||
if (lGetConstantInt($3, &size, @3, "Array dimension")) {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @4));
|
||||
d->arraySize = size;
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
if ($3 != NULL && lGetConstantInt($3, &size, @3, "Array dimension")) {
|
||||
if (size < 0) {
|
||||
Error(@3, "Array dimension must be non-negative.");
|
||||
$$ = NULL;
|
||||
}
|
||||
else {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @4));
|
||||
d->arraySize = size;
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
}
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
@@ -1206,7 +1221,7 @@ initializer_list
|
||||
$$ = NULL;
|
||||
else {
|
||||
ExprList *exprList = dynamic_cast<ExprList *>($1);
|
||||
assert(exprList);
|
||||
Assert(exprList);
|
||||
exprList->exprs.push_back($3);
|
||||
exprList->pos = Union(exprList->pos, @3);
|
||||
$$ = exprList;
|
||||
@@ -1245,10 +1260,22 @@ statement
|
||||
;
|
||||
|
||||
labeled_statement
|
||||
: TOKEN_CASE constant_expression ':' statement
|
||||
{ UNIMPLEMENTED; }
|
||||
: goto_identifier ':' statement
|
||||
{
|
||||
$$ = new LabeledStmt($1, $3, @1);
|
||||
}
|
||||
| TOKEN_CASE constant_expression ':' statement
|
||||
{
|
||||
int value;
|
||||
if ($2 != NULL &&
|
||||
lGetConstantInt($2, &value, @2, "Case statement value")) {
|
||||
$$ = new CaseStmt(value, $4, Union(@1, @2));
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
}
|
||||
| TOKEN_DEFAULT ':' statement
|
||||
{ UNIMPLEMENTED; }
|
||||
{ $$ = new DefaultStmt($3, @1); }
|
||||
;
|
||||
|
||||
start_scope
|
||||
@@ -1294,7 +1321,7 @@ selection_statement
|
||||
| TOKEN_CIF '(' expression ')' statement TOKEN_ELSE statement
|
||||
{ $$ = new IfStmt($3, $5, $7, true, @1); }
|
||||
| TOKEN_SWITCH '(' expression ')' statement
|
||||
{ UNIMPLEMENTED; }
|
||||
{ $$ = new SwitchStmt($3, $5, @1); }
|
||||
;
|
||||
|
||||
for_test
|
||||
@@ -1416,9 +1443,13 @@ iteration_statement
|
||||
}
|
||||
;
|
||||
|
||||
goto_identifier
|
||||
: TOKEN_IDENTIFIER { $$ = yylval.stringVal->c_str(); }
|
||||
;
|
||||
|
||||
jump_statement
|
||||
: TOKEN_GOTO TOKEN_IDENTIFIER ';'
|
||||
{ UNIMPLEMENTED; }
|
||||
: TOKEN_GOTO goto_identifier ';'
|
||||
{ $$ = new GotoStmt($2, @1, @2); }
|
||||
| TOKEN_CONTINUE ';'
|
||||
{ $$ = new ContinueStmt(false, @1); }
|
||||
| TOKEN_BREAK ';'
|
||||
@@ -1534,19 +1565,21 @@ lAddDeclaration(DeclSpecs *ds, Declarator *decl) {
|
||||
const Type *t = decl->GetType(ds);
|
||||
if (t == NULL)
|
||||
return;
|
||||
|
||||
Symbol *sym = decl->GetSymbol();
|
||||
Assert(sym != NULL);
|
||||
const FunctionType *ft = dynamic_cast<const FunctionType *>(t);
|
||||
if (ft != NULL) {
|
||||
Symbol *funSym = decl->GetSymbol();
|
||||
assert(funSym != NULL);
|
||||
funSym->type = ft;
|
||||
funSym->storageClass = ds->storageClass;
|
||||
|
||||
sym->type = ft;
|
||||
sym->storageClass = ds->storageClass;
|
||||
bool isInline = (ds->typeQualifiers & TYPEQUAL_INLINE);
|
||||
m->AddFunctionDeclaration(funSym, isInline);
|
||||
m->AddFunctionDeclaration(sym, isInline);
|
||||
}
|
||||
else {
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
bool isConst = (ds->typeQualifiers & TYPEQUAL_CONST) != 0;
|
||||
m->AddGlobalVariable(sym, decl->initExpr, isConst);
|
||||
}
|
||||
else
|
||||
m->AddGlobalVariable(decl->GetSymbol(), decl->initExpr,
|
||||
(ds->typeQualifiers & TYPEQUAL_CONST) != 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1561,19 +1594,22 @@ lAddFunctionParams(Declarator *decl) {
|
||||
// walk down to the declarator for the function itself
|
||||
while (decl->kind != DK_FUNCTION && decl->child != NULL)
|
||||
decl = decl->child;
|
||||
assert(decl->kind == DK_FUNCTION);
|
||||
Assert(decl->kind == DK_FUNCTION);
|
||||
|
||||
// now loop over its parameters and add them to the symbol table
|
||||
for (unsigned int i = 0; i < decl->functionParams.size(); ++i) {
|
||||
Declaration *pdecl = decl->functionParams[i];
|
||||
if (pdecl == NULL)
|
||||
if (pdecl == NULL || pdecl->declarators.size() == 0)
|
||||
// zero size declarators array corresponds to an anonymous
|
||||
// parameter
|
||||
continue;
|
||||
assert(pdecl->declarators.size() == 1);
|
||||
Assert(pdecl->declarators.size() == 1);
|
||||
Symbol *sym = pdecl->declarators[0]->GetSymbol();
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
#ifndef NDEBUG
|
||||
bool ok = m->symbolTable->AddVariable(sym);
|
||||
if (ok == false)
|
||||
assert(m->errorCount > 0);
|
||||
Assert(m->errorCount > 0);
|
||||
#else
|
||||
m->symbolTable->AddVariable(sym);
|
||||
#endif
|
||||
@@ -1586,7 +1622,8 @@ lAddFunctionParams(Declarator *decl) {
|
||||
|
||||
/** Add a symbol for the built-in mask variable to the symbol table */
|
||||
static void lAddMaskToSymbolTable(SourcePos pos) {
|
||||
const Type *t = AtomicType::VaryingConstUInt32;
|
||||
const Type *t = g->target.isa == Target::GENERIC ?
|
||||
AtomicType::VaryingConstBool : AtomicType::VaryingConstUInt32;
|
||||
Symbol *maskSymbol = new Symbol("__mask", pos, t);
|
||||
m->symbolTable->AddVariable(maskSymbol);
|
||||
}
|
||||
@@ -1640,7 +1677,7 @@ lGetStorageClassString(StorageClass sc) {
|
||||
case SC_EXTERN_C:
|
||||
return "extern \"C\"";
|
||||
default:
|
||||
assert(!"logic error in lGetStorageClassString()");
|
||||
Assert(!"logic error in lGetStorageClassString()");
|
||||
return "";
|
||||
}
|
||||
}
|
||||
@@ -1655,10 +1692,10 @@ static bool
|
||||
lGetConstantInt(Expr *expr, int *value, SourcePos pos, const char *usage) {
|
||||
if (expr == NULL)
|
||||
return false;
|
||||
expr = expr->TypeCheck();
|
||||
expr = TypeCheck(expr);
|
||||
if (expr == NULL)
|
||||
return false;
|
||||
expr = expr->Optimize();
|
||||
expr = Optimize(expr);
|
||||
if (expr == NULL)
|
||||
return false;
|
||||
|
||||
@@ -1673,6 +1710,10 @@ lGetConstantInt(Expr *expr, int *value, SourcePos pos, const char *usage) {
|
||||
Error(pos, "%s must be a compile-time integer constant.", usage);
|
||||
return false;
|
||||
}
|
||||
if ((int64_t)((int32_t)ci->getSExtValue()) != ci->getSExtValue()) {
|
||||
Error(pos, "%s must be representable with a 32-bit integer.", usage);
|
||||
return false;
|
||||
}
|
||||
*value = (int)ci->getZExtValue();
|
||||
return true;
|
||||
}
|
||||
@@ -1720,7 +1761,7 @@ lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
|
||||
if (enums[i]->constValue != NULL) {
|
||||
/* Already has a value, so first update nextVal with it. */
|
||||
int count = enums[i]->constValue->AsUInt32(&nextVal);
|
||||
assert(count == 1);
|
||||
Assert(count == 1);
|
||||
++nextVal;
|
||||
|
||||
/* When the source file as being parsed, the ConstExpr for any
|
||||
@@ -1730,10 +1771,10 @@ lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
|
||||
the actual enum type here and optimize it, which will have
|
||||
us end up with a ConstExpr with the desired EnumType... */
|
||||
Expr *castExpr = new TypeCastExpr(enumType, enums[i]->constValue,
|
||||
false, enums[i]->pos);
|
||||
castExpr = castExpr->Optimize();
|
||||
enums[i]->pos);
|
||||
castExpr = Optimize(castExpr);
|
||||
enums[i]->constValue = dynamic_cast<ConstExpr *>(castExpr);
|
||||
assert(enums[i]->constValue != NULL);
|
||||
Assert(enums[i]->constValue != NULL);
|
||||
}
|
||||
else {
|
||||
enums[i]->constValue = new ConstExpr(enumType, nextVal++,
|
||||
|
||||
375
run_tests.py
375
run_tests.py
@@ -2,9 +2,6 @@
|
||||
|
||||
# test-running driver for ispc
|
||||
|
||||
# TODO: windows support (mostly should be calling CL.exe rather than gcc
|
||||
# for static linking?)
|
||||
|
||||
from optparse import OptionParser
|
||||
import multiprocessing
|
||||
from ctypes import c_int
|
||||
@@ -15,35 +12,80 @@ import re
|
||||
import signal
|
||||
import random
|
||||
import string
|
||||
import mutex
|
||||
import subprocess
|
||||
import shlex
|
||||
import platform
|
||||
import tempfile
|
||||
|
||||
# This script is affected by http://bugs.python.org/issue5261 on OSX 10.5 Leopard
|
||||
# git history has a workaround for that issue.
|
||||
|
||||
is_windows = (platform.system() == 'Windows' or
|
||||
'CYGWIN_NT' in platform.system())
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order tests",
|
||||
default=False, action="store_true")
|
||||
parser.add_option("-s", "--static-exe", dest="static_exe",
|
||||
help="Create and run a regular executable for each test (rather than using the LLVM JIT).",
|
||||
default=False, action="store_true")
|
||||
parser.add_option("-g", "--generics-include", dest="include_file", help="Filename for header implementing functions for generics",
|
||||
default=None)
|
||||
parser.add_option('-t', '--target', dest='target',
|
||||
help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2)',
|
||||
help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16)',
|
||||
default="sse4")
|
||||
parser.add_option('-a', '--arch', dest='arch',
|
||||
help='Set architecture (x86, x86-64)',
|
||||
default="x86-64")
|
||||
parser.add_option("-c", "--compiler", dest="compiler_exe", help="Compiler binary to use to run tests",
|
||||
default=None)
|
||||
parser.add_option('-o', '--no-opt', dest='no_opt', help='Disable optimization',
|
||||
default=False, action="store_true")
|
||||
parser.add_option('-v', '--verbose', dest='verbose', help='Enable verbose output',
|
||||
default=False, action="store_true")
|
||||
if not is_windows:
|
||||
parser.add_option('--valgrind', dest='valgrind', help='Run tests with valgrind',
|
||||
default=False, action="store_true")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if not is_windows and options.valgrind:
|
||||
valgrind_exe = "valgrind "
|
||||
else:
|
||||
valgrind_exe = ""
|
||||
|
||||
if not is_windows:
|
||||
ispc_exe = "./ispc"
|
||||
else:
|
||||
ispc_exe = "Release/ispc.exe"
|
||||
|
||||
is_generic_target = options.target.find("generic-") != -1
|
||||
if is_generic_target and options.include_file == None:
|
||||
if options.target == "generic-4":
|
||||
sys.stderr.write("No generics #include specified; using examples/intrinsics/sse4.h\n")
|
||||
options.include_file = "examples/intrinsics/sse4.h"
|
||||
elif options.target == "generic-8":
|
||||
sys.stderr.write("No generics #include specified and no default available for \"generic-8\" target.\n")
|
||||
sys.exit(1)
|
||||
elif options.target == "generic-16":
|
||||
sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-16.h\n")
|
||||
options.include_file = "examples/intrinsics/generic-16.h"
|
||||
|
||||
if options.compiler_exe == None:
|
||||
if is_windows:
|
||||
options.compiler_exe = "cl"
|
||||
else:
|
||||
options.compiler_exe = "g++"
|
||||
|
||||
# if no specific test files are specified, run all of the tests in tests/
|
||||
# and failing_tests/
|
||||
if len(args) == 0:
|
||||
files = glob.glob("tests/*ispc") + glob.glob("failing_tests/*ispc") + \
|
||||
glob.glob("tests_errors/*ispc")
|
||||
else:
|
||||
files = args
|
||||
files = [ ]
|
||||
for f in args:
|
||||
if os.path.splitext(string.lower(f))[1] != ".ispc":
|
||||
print "Ignoring file %s, which doesn't have an .ispc extension." % f
|
||||
else:
|
||||
files += [ f ]
|
||||
|
||||
# randomly shuffle the tests if asked to do so
|
||||
if (options.random):
|
||||
@@ -52,18 +94,12 @@ if (options.random):
|
||||
|
||||
# counter
|
||||
total_tests = 0
|
||||
finished_tests_counter = multiprocessing.Value(c_int)
|
||||
|
||||
# We'd like to use the Lock class from the multiprocessing package to
|
||||
# serialize accesses to finished_tests_counter. Unfortunately, the version of
|
||||
# python that ships with OSX 10.5 has this bug:
|
||||
# http://bugs.python.org/issue5261. Therefore, we use the (deprecated but
|
||||
# still available) mutex class.
|
||||
#finished_tests_counter_lock = multiprocessing.Lock()
|
||||
finished_tests_mutex = mutex.mutex()
|
||||
finished_tests_counter = multiprocessing.Value(c_int)
|
||||
finished_tests_counter_lock = multiprocessing.Lock()
|
||||
|
||||
# utility routine to print an update on the number of tests that have been
|
||||
# finished. Should be called with the mutex (or lock) held..
|
||||
# finished. Should be called with the lock held..
|
||||
def update_progress(fn):
|
||||
finished_tests_counter.value = finished_tests_counter.value + 1
|
||||
progress_str = " Done %d / %d [%s]" % (finished_tests_counter.value, total_tests, fn)
|
||||
@@ -73,138 +109,197 @@ def update_progress(fn):
|
||||
progress_str += '\r'
|
||||
sys.stdout.write(progress_str)
|
||||
sys.stdout.flush()
|
||||
finished_tests_mutex.unlock()
|
||||
|
||||
fnull = open(os.devnull, 'w')
|
||||
def run_command(cmd):
|
||||
if options.verbose:
|
||||
sys.stdout.write("Running: %s\n" % cmd)
|
||||
sp = subprocess.Popen(shlex.split(cmd), stdin=None,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
out = sp.communicate()
|
||||
output = ""
|
||||
output += out[0].decode("utf-8")
|
||||
output += out[1].decode("utf-8")
|
||||
|
||||
return (sp.returncode, output)
|
||||
|
||||
# run the commands in cmd_list
|
||||
def run_cmds(cmd_list, filename, expect_failure):
|
||||
for cmd in cmd_list:
|
||||
if expect_failure:
|
||||
failed = (subprocess.call(cmd, shell = True, stdout = fnull, stderr = fnull) != 0)
|
||||
else:
|
||||
failed = (os.system(cmd) != 0)
|
||||
if failed:
|
||||
break
|
||||
def run_cmds(compile_cmds, run_cmd, filename, expect_failure):
|
||||
for cmd in compile_cmds:
|
||||
(return_code, output) = run_command(cmd)
|
||||
compile_failed = (return_code != 0)
|
||||
if compile_failed:
|
||||
sys.stdout.write("Compilation of test %s failed \n" % filename)
|
||||
if output != "":
|
||||
sys.stdout.write("%s" % output)
|
||||
return (1, 0)
|
||||
|
||||
surprise = ((expect_failure and not failed) or (not expect_failure and failed))
|
||||
(return_code, output) = run_command(run_cmd)
|
||||
run_failed = (return_code != 0)
|
||||
|
||||
surprise = ((expect_failure and not run_failed) or
|
||||
(not expect_failure and run_failed))
|
||||
if surprise == True:
|
||||
print "Test %s %s " % \
|
||||
(filename, "unexpectedly passed" if expect_failure else "failed")
|
||||
return surprise
|
||||
sys.stderr.write("Test %s %s (return code %d) \n" % \
|
||||
(filename, "unexpectedly passed" if expect_failure else "failed",
|
||||
return_code))
|
||||
if output != "":
|
||||
sys.stdout.write("%s\n" % output)
|
||||
if surprise == True:
|
||||
return (0, 1)
|
||||
else:
|
||||
return (0, 0)
|
||||
|
||||
|
||||
# pull tests to run from the given queue and run them. Multiple copies of
|
||||
# this function will be running in parallel across all of the CPU cores of
|
||||
# the system.
|
||||
def run_tasks_from_queue(queue):
|
||||
error_count = 0
|
||||
while True:
|
||||
filename = queue.get()
|
||||
if (filename == 'STOP'):
|
||||
sys.exit(error_count)
|
||||
def run_test(filename):
|
||||
global is_windows
|
||||
if is_windows:
|
||||
input_prefix = "../"
|
||||
else:
|
||||
input_prefix = ""
|
||||
|
||||
# is this a test to make sure an error is issued?
|
||||
want_error = (filename.find("tests_errors") != -1)
|
||||
if want_error == True:
|
||||
ispc_cmd = ispc_exe + " --werror --nowrap %s --arch=%s --target=%s" % \
|
||||
(input_prefix + filename, options.arch, options.target)
|
||||
(return_code, output) = run_command(ispc_cmd)
|
||||
got_error = (return_code != 0)
|
||||
|
||||
# is this a test to make sure an error is issued?
|
||||
want_error = (filename.find("tests_errors") != -1)
|
||||
if want_error == True:
|
||||
ispc_cmd = "ispc --werror --nowrap %s --arch=%s --target=%s" % \
|
||||
(filename, options.arch, options.target)
|
||||
sp = subprocess.Popen(shlex.split(ispc_cmd), stdin=None, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
output = sp.communicate()[1]
|
||||
got_error = (sp.returncode != 0)
|
||||
|
||||
# figure out the error message we're expecting
|
||||
file = open(filename, 'r')
|
||||
firstline = file.readline()
|
||||
firstline = string.replace(firstline, "//", "")
|
||||
firstline = string.lstrip(firstline)
|
||||
firstline = string.rstrip(firstline)
|
||||
file.close()
|
||||
|
||||
if (output.find(firstline) == -1):
|
||||
print "Didn't see expected error message \"%s\" from test %s.\nActual outout: %s" % \
|
||||
(firstline, filename, output)
|
||||
error_count += 1
|
||||
elif got_error == False:
|
||||
print "Unexpectedly no errors issued from test %s" % filename
|
||||
error_count += 1
|
||||
continue
|
||||
# figure out the error message we're expecting
|
||||
file = open(input_prefix + filename, 'r')
|
||||
firstline = file.readline()
|
||||
firstline = firstline.replace("//", "")
|
||||
firstline = firstline.lstrip()
|
||||
firstline = firstline.rstrip()
|
||||
file.close()
|
||||
|
||||
if (output.find(firstline) == -1):
|
||||
sys.stderr.write("Didn't see expected error message %s from test %s.\nActual output:\n%s\n" % \
|
||||
(firstline, filename, output))
|
||||
return (1, 0)
|
||||
elif got_error == False:
|
||||
sys.stderr.write("Unexpectedly no errors issued from test %s\n" % filename)
|
||||
return (1, 0)
|
||||
else:
|
||||
return (0, 0)
|
||||
else:
|
||||
# do we expect this test to fail?
|
||||
should_fail = (filename.find("failing_") != -1)
|
||||
|
||||
if options.static_exe == True:
|
||||
# if the user wants us to build a static executable to run for
|
||||
# this test, we need to figure out the signature of the test
|
||||
# function that this test has.
|
||||
sig2def = { "f_v(" : 0, "f_f(" : 1, "f_fu(" : 2, "f_fi(" : 3,
|
||||
"f_du(" : 4, "f_duf(" : 5, "f_di(" : 6 }
|
||||
file = open(filename, 'r')
|
||||
match = -1
|
||||
for line in file:
|
||||
# look for lines with 'export'...
|
||||
if line.find("export") == -1:
|
||||
continue
|
||||
# one of them should have a function with one of the
|
||||
# declarations in sig2def
|
||||
for pattern, ident in sig2def.items():
|
||||
if line.find(pattern) != -1:
|
||||
match = ident
|
||||
break
|
||||
file.close()
|
||||
if match == -1:
|
||||
print "Fatal error: unable to find function signature in test %s" % filename
|
||||
error_count += 1
|
||||
# We need to figure out the signature of the test
|
||||
# function that this test has.
|
||||
sig2def = { "f_v(" : 0, "f_f(" : 1, "f_fu(" : 2, "f_fi(" : 3,
|
||||
"f_du(" : 4, "f_duf(" : 5, "f_di(" : 6 }
|
||||
file = open(input_prefix + filename, 'r')
|
||||
match = -1
|
||||
for line in file:
|
||||
# look for lines with 'export'...
|
||||
if line.find("export") == -1:
|
||||
continue
|
||||
# one of them should have a function with one of the
|
||||
# declarations in sig2def
|
||||
for pattern, ident in list(sig2def.items()):
|
||||
if line.find(pattern) != -1:
|
||||
match = ident
|
||||
break
|
||||
file.close()
|
||||
if match == -1:
|
||||
sys.stderr.write("Fatal error: unable to find function signature " + \
|
||||
"in test %s\n" % filename)
|
||||
return (1, 0)
|
||||
else:
|
||||
is_generic_target = options.target.find("generic-") != -1
|
||||
if is_generic_target:
|
||||
obj_name = "%s.cpp" % filename
|
||||
|
||||
if is_windows:
|
||||
if not is_generic_target:
|
||||
obj_name = "%s%s.obj" % (input_prefix, filename)
|
||||
exe_name = "%s%s.exe" % (input_prefix, filename)
|
||||
|
||||
cc_cmd = "%s /I. /Iwinstuff /Zi /nologo /DTEST_SIG=%d %stest_static.cpp %s /Fe%s" % \
|
||||
(options.compiler_exe, match, input_prefix, obj_name, exe_name)
|
||||
if should_fail:
|
||||
cc_cmd += " /DEXPECT_FAILURE"
|
||||
else:
|
||||
obj_name = "%s.o" % filename
|
||||
if not is_generic_target:
|
||||
obj_name = "%s.o" % filename
|
||||
exe_name = "%s.run" % filename
|
||||
ispc_cmd = "ispc --woff %s -o %s --arch=%s --target=%s" % \
|
||||
(filename, obj_name, options.arch, options.target)
|
||||
if options.no_opt:
|
||||
ispc_cmd += " -O0"
|
||||
|
||||
if options.arch == 'x86':
|
||||
gcc_arch = '-m32'
|
||||
else:
|
||||
gcc_arch = '-m64'
|
||||
gcc_cmd = "g++ %s test_static.cpp -DTEST_SIG=%d %s.o -o %s" % \
|
||||
(gcc_arch, match, filename, exe_name)
|
||||
cc_cmd = "%s -O2 -msse4.2 -I. %s test_static.cpp -DTEST_SIG=%d %s -o %s" % \
|
||||
(options.compiler_exe, gcc_arch, match, obj_name, exe_name)
|
||||
if platform.system() == 'Darwin':
|
||||
gcc_cmd += ' -Wl,-no_pie'
|
||||
cc_cmd += ' -Wl,-no_pie'
|
||||
if should_fail:
|
||||
gcc_cmd += " -DEXPECT_FAILURE"
|
||||
|
||||
# compile the ispc code, make the executable, and run it...
|
||||
error_count += run_cmds([ispc_cmd, gcc_cmd, exe_name], filename, should_fail)
|
||||
cc_cmd += " -DEXPECT_FAILURE"
|
||||
|
||||
# clean up after running the test
|
||||
ispc_cmd = ispc_exe + " --woff %s -o %s --arch=%s --target=%s" % \
|
||||
(input_prefix+filename, obj_name, options.arch, options.target)
|
||||
if options.no_opt:
|
||||
ispc_cmd += " -O0"
|
||||
if is_generic_target:
|
||||
ispc_cmd += " --emit-c++ --c++-include-file=%s" % options.include_file
|
||||
|
||||
# compile the ispc code, make the executable, and run it...
|
||||
global valgrind_exe
|
||||
(compile_error, run_error) = run_cmds([ispc_cmd, cc_cmd],
|
||||
valgrind_exe + " " + exe_name, \
|
||||
filename, should_fail)
|
||||
|
||||
# clean up after running the test
|
||||
try:
|
||||
if not run_error:
|
||||
os.unlink(exe_name)
|
||||
if is_windows:
|
||||
os.unlink(filename + ".pdb")
|
||||
os.unlink(filename + ".ilk")
|
||||
os.unlink(obj_name)
|
||||
except:
|
||||
None
|
||||
|
||||
return (compile_error, run_error)
|
||||
|
||||
# pull tests to run from the given queue and run them. Multiple copies of
|
||||
# this function will be running in parallel across all of the CPU cores of
|
||||
# the system.
|
||||
def run_tasks_from_queue(queue, queue_ret):
|
||||
if is_windows:
|
||||
tmpdir = "tmp%d" % os.getpid()
|
||||
os.mkdir(tmpdir)
|
||||
os.chdir(tmpdir)
|
||||
else:
|
||||
olddir = ""
|
||||
|
||||
compile_error_files = [ ]
|
||||
run_error_files = [ ]
|
||||
while True:
|
||||
filename = queue.get()
|
||||
if (filename == 'STOP'):
|
||||
queue_ret.put((compile_error_files, run_error_files))
|
||||
if is_windows:
|
||||
try:
|
||||
os.unlink(exe_name)
|
||||
os.unlink(obj_name)
|
||||
os.remove("test_static.obj")
|
||||
os.remove("/vc100.pdb")
|
||||
os.chdir("..")
|
||||
os.rmdir(tmpdir)
|
||||
except:
|
||||
None
|
||||
else:
|
||||
# otherwise we'll use ispc_test + the LLVM JIT to run the test
|
||||
bitcode_file = "%s.bc" % filename
|
||||
compile_cmd = "ispc --woff --emit-llvm %s --target=%s -o %s" % \
|
||||
(filename, options.target, bitcode_file)
|
||||
if options.no_opt:
|
||||
compile_cmd += " -O0"
|
||||
test_cmd = "ispc_test %s" % bitcode_file
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
error_count += run_cmds([compile_cmd, test_cmd], filename, should_fail)
|
||||
|
||||
try:
|
||||
os.unlink(bitcode_file)
|
||||
except:
|
||||
None
|
||||
|
||||
# If not for http://bugs.python.org/issue5261 on OSX, we'd like to do this:
|
||||
#with finished_tests_counter_lock:
|
||||
#update_progress(filename)
|
||||
# but instead we do this...
|
||||
finished_tests_mutex.lock(update_progress, filename)
|
||||
(compile_error, run_error) = run_test(filename)
|
||||
if compile_error != 0:
|
||||
compile_error_files += [ filename ]
|
||||
if run_error != 0:
|
||||
run_error_files += [ filename ]
|
||||
|
||||
with finished_tests_counter_lock:
|
||||
update_progress(filename)
|
||||
|
||||
task_threads = []
|
||||
|
||||
@@ -214,8 +309,12 @@ def sigint(signum, frame):
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
nthreads = multiprocessing.cpu_count()
|
||||
total_tests = len(files)
|
||||
|
||||
compile_error_files = [ ]
|
||||
run_error_files = [ ]
|
||||
|
||||
nthreads = multiprocessing.cpu_count()
|
||||
print "Found %d CPUs. Running %d tests." % (nthreads, total_tests)
|
||||
|
||||
# put each of the test filenames into a queue
|
||||
@@ -224,6 +323,7 @@ if __name__ == '__main__':
|
||||
q.put(fn)
|
||||
for x in range(nthreads):
|
||||
q.put('STOP')
|
||||
qret = multiprocessing.Queue()
|
||||
|
||||
# need to catch sigint so that we can terminate all of the tasks if
|
||||
# we're interrupted
|
||||
@@ -231,17 +331,30 @@ if __name__ == '__main__':
|
||||
|
||||
# launch jobs to run tests
|
||||
for x in range(nthreads):
|
||||
t = multiprocessing.Process(target=run_tasks_from_queue, args=(q,))
|
||||
t = multiprocessing.Process(target=run_tasks_from_queue, args=(q,qret))
|
||||
task_threads.append(t)
|
||||
t.start()
|
||||
|
||||
# wait for them to all finish and then return the number that failed
|
||||
# (i.e. return 0 if all is ok)
|
||||
error_count = 0
|
||||
for t in task_threads:
|
||||
t.join()
|
||||
error_count += t.exitcode
|
||||
print
|
||||
if error_count > 0:
|
||||
print "%d / %d tests FAILED!" % (error_count, total_tests)
|
||||
sys.exit(error_count)
|
||||
|
||||
while not qret.empty():
|
||||
(c, r) = qret.get()
|
||||
compile_error_files += c
|
||||
run_error_files += r
|
||||
|
||||
if len(compile_error_files) > 0:
|
||||
compile_error_files.sort()
|
||||
sys.stdout.write("%d / %d tests FAILED compilation:\n" % (len(compile_error_files), total_tests))
|
||||
for f in compile_error_files:
|
||||
sys.stdout.write("\t%s\n" % f)
|
||||
if len(run_error_files) > 0:
|
||||
run_error_files.sort()
|
||||
sys.stdout.write("%d / %d tests FAILED execution:\n" % (len(run_error_files), total_tests))
|
||||
for f in run_error_files:
|
||||
sys.stdout.write("\t%s\n" % f)
|
||||
|
||||
sys.exit(len(compile_error_files) + len(run_error_files))
|
||||
|
||||
95
run_tests.sh
95
run_tests.sh
@@ -1,95 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
surprises=0
|
||||
verbose=false
|
||||
number=$(ls -1 tests/*.ispc|wc -l)
|
||||
counter=1
|
||||
target=sse4
|
||||
|
||||
while getopts ":vt:h" opt;do
|
||||
case $opt in
|
||||
v) verbose=true
|
||||
;;
|
||||
t) target=$OPTARG
|
||||
;;
|
||||
h) cat <<EOF
|
||||
usage: run_tests.sh [-v] [-t target] [filenames]
|
||||
-v # verbose output
|
||||
-t # specify compilation target (SSE4 is the default).
|
||||
[filenames] # (optional) files to run through testing infrastructure
|
||||
# if none are provided, all in tests/ will be run.
|
||||
EOF
|
||||
exit 1
|
||||
esac
|
||||
done
|
||||
|
||||
ISPC_ARCH=x86-64
|
||||
if [[ $OS == "Windows_NT" ]]; then
|
||||
ISPC_ARCH=x86
|
||||
fi
|
||||
ISPC_ARGS="--target=$target --arch=$ISPC_ARCH -O2 --woff"
|
||||
|
||||
shift $(( $OPTIND - 1 ))
|
||||
if [[ "$1" > 0 ]]; then
|
||||
while [[ "$1" > 0 ]]; do
|
||||
i=$1
|
||||
shift
|
||||
echo Running test $i
|
||||
|
||||
bc=${i%%ispc}bc
|
||||
ispc $ISPC_ARGS $i -o $bc --emit-llvm
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc compile
|
||||
echo
|
||||
else
|
||||
ispc_test $bc
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc_test
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
/bin/rm -f $bc
|
||||
done
|
||||
else
|
||||
echo Running all correctness tests
|
||||
|
||||
for i in tests/*.ispc; do
|
||||
if $verbose; then
|
||||
echo -en "Running test $counter of $number.\r"
|
||||
fi
|
||||
(( counter++ ))
|
||||
bc=${i%%ispc}bc
|
||||
ispc $ISPC_ARGS $i -o $bc --emit-llvm
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc compile
|
||||
echo
|
||||
else
|
||||
ispc_test $bc
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc_test
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
/bin/rm -f $bc
|
||||
done
|
||||
|
||||
echo -e "\nRunning failing tests"
|
||||
for i in failing_tests/*.ispc; do
|
||||
(ispc -O2 $i -woff -o - --emit-llvm | ispc_test -) 2>/dev/null 1>/dev/null
|
||||
if [[ $? == 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i UNEXPECTEDLY PASSED
|
||||
echo
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ $surprises == 0 ]]; then
|
||||
echo No surprises.
|
||||
fi
|
||||
|
||||
exit $surprises
|
||||
337
stdlib.ispc
337
stdlib.ispc
@@ -38,6 +38,14 @@
|
||||
ispc code
|
||||
*/
|
||||
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
#define IntMaskType bool
|
||||
#define UIntMaskType bool
|
||||
#else
|
||||
#define IntMaskType int32
|
||||
#define UIntMaskType unsigned int32
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Low level primitives
|
||||
|
||||
@@ -86,15 +94,15 @@ static inline float broadcast(float v, uniform int i) {
|
||||
}
|
||||
|
||||
static inline int8 broadcast(int8 v, uniform int i) {
|
||||
return __broadcast_int8(v, i);
|
||||
return __broadcast_i8(v, i);
|
||||
}
|
||||
|
||||
static inline int16 broadcast(int16 v, uniform int i) {
|
||||
return __broadcast_int16(v, i);
|
||||
return __broadcast_i16(v, i);
|
||||
}
|
||||
|
||||
static inline int32 broadcast(int32 v, uniform int i) {
|
||||
return __broadcast_int32(v, i);
|
||||
return __broadcast_i32(v, i);
|
||||
}
|
||||
|
||||
static inline double broadcast(double v, uniform int i) {
|
||||
@@ -102,7 +110,7 @@ static inline double broadcast(double v, uniform int i) {
|
||||
}
|
||||
|
||||
static inline int64 broadcast(int64 v, uniform int i) {
|
||||
return __broadcast_int64(v, i);
|
||||
return __broadcast_i64(v, i);
|
||||
}
|
||||
|
||||
static inline float rotate(float v, uniform int i) {
|
||||
@@ -110,15 +118,15 @@ static inline float rotate(float v, uniform int i) {
|
||||
}
|
||||
|
||||
static inline int8 rotate(int8 v, uniform int i) {
|
||||
return __rotate_int8(v, i);
|
||||
return __rotate_i8(v, i);
|
||||
}
|
||||
|
||||
static inline int16 rotate(int16 v, uniform int i) {
|
||||
return __rotate_int16(v, i);
|
||||
return __rotate_i16(v, i);
|
||||
}
|
||||
|
||||
static inline int32 rotate(int32 v, uniform int i) {
|
||||
return __rotate_int32(v, i);
|
||||
return __rotate_i32(v, i);
|
||||
}
|
||||
|
||||
static inline double rotate(double v, uniform int i) {
|
||||
@@ -126,7 +134,7 @@ static inline double rotate(double v, uniform int i) {
|
||||
}
|
||||
|
||||
static inline int64 rotate(int64 v, uniform int i) {
|
||||
return __rotate_int64(v, i);
|
||||
return __rotate_i64(v, i);
|
||||
}
|
||||
|
||||
static inline float shuffle(float v, int i) {
|
||||
@@ -134,15 +142,15 @@ static inline float shuffle(float v, int i) {
|
||||
}
|
||||
|
||||
static inline int8 shuffle(int8 v, int i) {
|
||||
return __shuffle_int8(v, i);
|
||||
return __shuffle_i8(v, i);
|
||||
}
|
||||
|
||||
static inline int16 shuffle(int16 v, int i) {
|
||||
return __shuffle_int16(v, i);
|
||||
return __shuffle_i16(v, i);
|
||||
}
|
||||
|
||||
static inline int32 shuffle(int32 v, int i) {
|
||||
return __shuffle_int32(v, i);
|
||||
return __shuffle_i32(v, i);
|
||||
}
|
||||
|
||||
static inline double shuffle(double v, int i) {
|
||||
@@ -150,7 +158,7 @@ static inline double shuffle(double v, int i) {
|
||||
}
|
||||
|
||||
static inline int64 shuffle(int64 v, int i) {
|
||||
return __shuffle_int64(v, i);
|
||||
return __shuffle_i64(v, i);
|
||||
}
|
||||
|
||||
static inline float shuffle(float v0, float v1, int i) {
|
||||
@@ -158,15 +166,15 @@ static inline float shuffle(float v0, float v1, int i) {
|
||||
}
|
||||
|
||||
static inline int8 shuffle(int8 v0, int8 v1, int i) {
|
||||
return __shuffle2_int8(v0, v1, i);
|
||||
return __shuffle2_i8(v0, v1, i);
|
||||
}
|
||||
|
||||
static inline int16 shuffle(int16 v0, int16 v1, int i) {
|
||||
return __shuffle2_int16(v0, v1, i);
|
||||
return __shuffle2_i16(v0, v1, i);
|
||||
}
|
||||
|
||||
static inline int32 shuffle(int32 v0, int32 v1, int i) {
|
||||
return __shuffle2_int32(v0, v1, i);
|
||||
return __shuffle2_i32(v0, v1, i);
|
||||
}
|
||||
|
||||
static inline double shuffle(double v0, double v1, int i) {
|
||||
@@ -174,7 +182,7 @@ static inline double shuffle(double v0, double v1, int i) {
|
||||
}
|
||||
|
||||
static inline int64 shuffle(int64 v0, int64 v1, int i) {
|
||||
return __shuffle2_int64(v0, v1, i);
|
||||
return __shuffle2_i64(v0, v1, i);
|
||||
}
|
||||
|
||||
// x[i]
|
||||
@@ -274,13 +282,21 @@ static inline int32 sign_extend(bool v) {
|
||||
static inline uniform bool any(bool v) {
|
||||
// We only care about whether "any" is true for the active program instances,
|
||||
// so we have to make v with the current program mask.
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
return __movmsk(v & __mask) != 0;
|
||||
#else
|
||||
return __movmsk(__sext_varying_bool(v) & __mask) != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uniform bool all(bool v) {
|
||||
// As with any(), we need to explicitly mask v with the current program mask
|
||||
// so we're only looking at the current lanes
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
bool match = ((v & __mask) == __mask);
|
||||
#else
|
||||
int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask);
|
||||
#endif
|
||||
return __movmsk(match) == (1 << programCount) - 1;
|
||||
}
|
||||
|
||||
@@ -296,19 +312,23 @@ static inline int popcnt(int v) {
|
||||
int r;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
r = insert(r, i, popcnt(extract(v, i)));
|
||||
return (r & __mask);
|
||||
return __mask ? r : 0;
|
||||
}
|
||||
|
||||
static inline int popcnt(int64 v) {
|
||||
int r;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
r = insert(r, i, popcnt(extract(v, i)));
|
||||
return (r & __mask);
|
||||
return __mask ? r : 0;
|
||||
}
|
||||
|
||||
static inline uniform int popcnt(bool v) {
|
||||
// As with any() and all(), only count across the active lanes
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
return __popcnt_int32(__movmsk(v & __mask));
|
||||
#else
|
||||
return __popcnt_int32(__movmsk(__sext_varying_bool(v) & __mask));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uniform int lanemask() {
|
||||
@@ -450,23 +470,27 @@ soa_to_aos4(float v0, float v1, float v2, float v3, uniform float a[]) {
|
||||
static inline void
|
||||
aos_to_soa3(uniform int32 a[], int32 * uniform v0, int32 * uniform v1,
|
||||
int32 * uniform v2) {
|
||||
__aos_to_soa3_int32(a, v0, v1, v2);
|
||||
aos_to_soa3((uniform float * uniform)a, (float * uniform)v0,
|
||||
(float * uniform)v1, (float * uniform)v2);
|
||||
}
|
||||
|
||||
static inline void
|
||||
soa_to_aos3(int32 v0, int32 v1, int32 v2, uniform int32 a[]) {
|
||||
__soa_to_aos3_int32(v0, v1, v2, a);
|
||||
soa_to_aos3(floatbits(v0), floatbits(v1), floatbits(v2),
|
||||
(uniform float * uniform)a);
|
||||
}
|
||||
|
||||
static inline void
|
||||
aos_to_soa4(uniform int32 a[], int32 * uniform v0, int32 * uniform v1,
|
||||
int32 * uniform v2, int32 * uniform v3) {
|
||||
__aos_to_soa4_int32(a, v0, v1, v2, v3);
|
||||
aos_to_soa4((uniform float * uniform)a, (float * uniform )v0,
|
||||
(float * uniform)v1, (float * uniform)v2, (float * uniform)v3);
|
||||
}
|
||||
|
||||
static inline void
|
||||
soa_to_aos4(int32 v0, int32 v1, int32 v2, int32 v3, uniform int32 a[]) {
|
||||
__soa_to_aos4_int32(v0, v1, v2, v3, a);
|
||||
soa_to_aos4(floatbits(v0), floatbits(v1), floatbits(v2), floatbits(v3),
|
||||
(uniform float * uniform)a);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -569,7 +593,7 @@ static inline uniform float reduce_max(float v) {
|
||||
|
||||
static inline uniform int reduce_add(int x) {
|
||||
// Zero out the values for lanes that aren't running
|
||||
return __reduce_add_int32(x & __mask);
|
||||
return __reduce_add_int32(__mask ? x : 0);
|
||||
}
|
||||
|
||||
static inline uniform int reduce_min(int v) {
|
||||
@@ -589,7 +613,7 @@ static inline uniform int reduce_max(int v) {
|
||||
static inline uniform unsigned int reduce_add(unsigned int x) {
|
||||
// Set values for non-running lanes to zero so they don't affect the
|
||||
// result.
|
||||
return __reduce_add_uint32(x & __mask);
|
||||
return __reduce_add_uint32(__mask ? x : 0);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int reduce_min(unsigned int v) {
|
||||
@@ -627,7 +651,7 @@ static inline uniform double reduce_max(double v) {
|
||||
|
||||
static inline uniform int64 reduce_add(int64 x) {
|
||||
// Zero out the values for lanes that aren't running
|
||||
return __reduce_add_int64(x & (int64)(__mask));
|
||||
return __reduce_add_int64(__mask ? x : 0);
|
||||
}
|
||||
|
||||
static inline uniform int64 reduce_min(int64 v) {
|
||||
@@ -647,7 +671,7 @@ static inline uniform int64 reduce_max(int64 v) {
|
||||
static inline uniform unsigned int64 reduce_add(unsigned int64 x) {
|
||||
// Set values for non-running lanes to zero so they don't affect the
|
||||
// result.
|
||||
return __reduce_add_int64(x & (int64)(__mask));
|
||||
return __reduce_add_int64(__mask ? x : 0);
|
||||
}
|
||||
|
||||
static inline uniform unsigned int64 reduce_min(unsigned int64 v) {
|
||||
@@ -672,19 +696,19 @@ static inline uniform bool reduce_equal(TYPE v, uniform TYPE * uniform value) {
|
||||
return __reduce_equal_##FUNCTYPE(v, value, (MASKTYPE)__mask); \
|
||||
}
|
||||
|
||||
REDUCE_EQUAL(int32, int32, int32)
|
||||
REDUCE_EQUAL(unsigned int32, int32, unsigned int32)
|
||||
REDUCE_EQUAL(float, float, int32)
|
||||
REDUCE_EQUAL(int64, int64, int32)
|
||||
REDUCE_EQUAL(unsigned int64, int64, unsigned int32)
|
||||
REDUCE_EQUAL(double, double, int32)
|
||||
REDUCE_EQUAL(int32, int32, IntMaskType)
|
||||
REDUCE_EQUAL(unsigned int32, int32, UIntMaskType)
|
||||
REDUCE_EQUAL(float, float, IntMaskType)
|
||||
REDUCE_EQUAL(int64, int64, IntMaskType)
|
||||
REDUCE_EQUAL(unsigned int64, int64, UIntMaskType)
|
||||
REDUCE_EQUAL(double, double, IntMaskType)
|
||||
|
||||
static int32 exclusive_scan_add(int32 v) {
|
||||
return __exclusive_scan_add_i32(v, (int32)__mask);
|
||||
return __exclusive_scan_add_i32(v, (IntMaskType)__mask);
|
||||
}
|
||||
|
||||
static unsigned int32 exclusive_scan_add(unsigned int32 v) {
|
||||
return __exclusive_scan_add_i32(v, __mask);
|
||||
return __exclusive_scan_add_i32((int32)v, (IntMaskType)__mask);
|
||||
}
|
||||
|
||||
static float exclusive_scan_add(float v) {
|
||||
@@ -692,11 +716,11 @@ static float exclusive_scan_add(float v) {
|
||||
}
|
||||
|
||||
static int64 exclusive_scan_add(int64 v) {
|
||||
return __exclusive_scan_add_i64(v, (int32)__mask);
|
||||
return __exclusive_scan_add_i64(v, (IntMaskType)__mask);
|
||||
}
|
||||
|
||||
static unsigned int64 exclusive_scan_add(unsigned int64 v) {
|
||||
return __exclusive_scan_add_i64(v, __mask);
|
||||
return __exclusive_scan_add_i64(v, (UIntMaskType)__mask);
|
||||
}
|
||||
|
||||
static double exclusive_scan_add(double v) {
|
||||
@@ -704,35 +728,35 @@ static double exclusive_scan_add(double v) {
|
||||
}
|
||||
|
||||
static int32 exclusive_scan_and(int32 v) {
|
||||
return __exclusive_scan_and_i32(v, (int32)__mask);
|
||||
return __exclusive_scan_and_i32(v, (IntMaskType)__mask);
|
||||
}
|
||||
|
||||
static unsigned int32 exclusive_scan_and(unsigned int32 v) {
|
||||
return __exclusive_scan_and_i32(v, __mask);
|
||||
return __exclusive_scan_and_i32(v, (UIntMaskType)__mask);
|
||||
}
|
||||
|
||||
static int64 exclusive_scan_and(int64 v) {
|
||||
return __exclusive_scan_and_i64(v, (int32)__mask);
|
||||
return __exclusive_scan_and_i64(v, (IntMaskType)__mask);
|
||||
}
|
||||
|
||||
static unsigned int64 exclusive_scan_and(unsigned int64 v) {
|
||||
return __exclusive_scan_and_i64(v, __mask);
|
||||
return __exclusive_scan_and_i64(v, (UIntMaskType)__mask);
|
||||
}
|
||||
|
||||
static int32 exclusive_scan_or(int32 v) {
|
||||
return __exclusive_scan_or_i32(v, (int32)__mask);
|
||||
return __exclusive_scan_or_i32(v, (IntMaskType)__mask);
|
||||
}
|
||||
|
||||
static unsigned int32 exclusive_scan_or(unsigned int32 v) {
|
||||
return __exclusive_scan_or_i32(v, __mask);
|
||||
return __exclusive_scan_or_i32(v, (UIntMaskType)__mask);
|
||||
}
|
||||
|
||||
static int64 exclusive_scan_or(int64 v) {
|
||||
return __exclusive_scan_or_i64(v, (int32)__mask);
|
||||
return __exclusive_scan_or_i64(v, (IntMaskType)__mask);
|
||||
}
|
||||
|
||||
static unsigned int64 exclusive_scan_or(unsigned int64 v) {
|
||||
return __exclusive_scan_or_i64(v, __mask);
|
||||
return __exclusive_scan_or_i64(v, (UIntMaskType)__mask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -741,23 +765,23 @@ static unsigned int64 exclusive_scan_or(unsigned int64 v) {
|
||||
static inline uniform int
|
||||
packed_load_active(uniform unsigned int * uniform a,
|
||||
unsigned int * uniform vals) {
|
||||
return __packed_load_active(a, vals, (unsigned int32)__mask);
|
||||
return __packed_load_active(a, vals, (UIntMaskType)__mask);
|
||||
}
|
||||
|
||||
static inline uniform int
|
||||
packed_store_active(uniform unsigned int * uniform a,
|
||||
unsigned int vals) {
|
||||
return __packed_store_active(a, vals, (unsigned int32)__mask);
|
||||
return __packed_store_active(a, vals, (UIntMaskType)__mask);
|
||||
}
|
||||
|
||||
static inline uniform int
|
||||
packed_load_active(uniform int * uniform a, int * uniform vals) {
|
||||
return __packed_load_active(a, vals, (int32)__mask);
|
||||
return __packed_load_active(a, vals, (IntMaskType)__mask);
|
||||
}
|
||||
|
||||
static inline uniform int
|
||||
packed_store_active(uniform int * uniform a, int vals) {
|
||||
return __packed_store_active(a, vals, (int32)__mask);
|
||||
return __packed_store_active(a, vals, (IntMaskType)__mask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -784,8 +808,7 @@ static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value, \
|
||||
(MASKTYPE)__mask); \
|
||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
@@ -800,22 +823,80 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
||||
continue; \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
uniform TA v = extract(value, i); \
|
||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v, \
|
||||
(MASKTYPE)__mask); \
|
||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
|
||||
#define DEFINE_ATOMIC_MINMAX_OP(TA,TB,OPA,OPB, MASKTYPE) \
|
||||
#define DEFINE_ATOMIC_SWAP(TA,TB) \
|
||||
static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform int i = 0; \
|
||||
TA ret[programCount]; \
|
||||
TA memVal; \
|
||||
uniform int lastSwap; \
|
||||
uniform int mask = lanemask(); \
|
||||
/* First, have the first running program instance (if any) perform \
|
||||
the swap with memory with its value of "value"; record the \
|
||||
value returned. */ \
|
||||
for (; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
memVal = __atomic_swap_uniform_##TB##_global(ptr, extract(value, i)); \
|
||||
lastSwap = i; \
|
||||
break; \
|
||||
} \
|
||||
/* Now, for all of the remaining running program instances, set the \
|
||||
return value of the last instance that did a swap with this \
|
||||
instance's value of "value"; this gives the same effect as if the \
|
||||
current instance had executed a hardware atomic swap right before \
|
||||
the last one that did a swap. */ \
|
||||
for (; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
ret[lastSwap] = extract(value, i); \
|
||||
lastSwap = i; \
|
||||
} \
|
||||
/* And the last instance that wanted to swap gets the value we \
|
||||
originally got back from memory... */ \
|
||||
ret[lastSwap] = memVal; \
|
||||
memory_barrier(); \
|
||||
return ret[programIndex]; \
|
||||
} \
|
||||
static inline uniform TA atomic_swap_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_swap_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
uniform TA v = extract(value, i); \
|
||||
uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
|
||||
#define DEFINE_ATOMIC_MINMAX_OP(TA,TB,OPA,OPB) \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||
uniform TA oneval = reduce_##OPA(value); \
|
||||
TA ret; \
|
||||
if (lanemask() != 0) { \
|
||||
memory_barrier(); \
|
||||
ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval, \
|
||||
(MASKTYPE)__mask); \
|
||||
ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval); \
|
||||
memory_barrier(); \
|
||||
} \
|
||||
return ret; \
|
||||
@@ -823,8 +904,7 @@ static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value, \
|
||||
(MASKTYPE)__mask); \
|
||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
@@ -840,59 +920,60 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
||||
continue; \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
uniform TA v = extract(value, i); \
|
||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v, \
|
||||
(MASKTYPE)__mask); \
|
||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
DEFINE_ATOMIC_OP(int32,int32,add,add,int32)
|
||||
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,int32)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min,int32)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max,int32)
|
||||
DEFINE_ATOMIC_OP(int32,int32,and,and,int32)
|
||||
DEFINE_ATOMIC_OP(int32,int32,or,or,int32)
|
||||
DEFINE_ATOMIC_OP(int32,int32,xor,xor,int32)
|
||||
DEFINE_ATOMIC_OP(int32,int32,swap,swap,int32)
|
||||
DEFINE_ATOMIC_OP(int32,int32,add,add,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,IntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max)
|
||||
DEFINE_ATOMIC_OP(int32,int32,and,and,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int32,int32,or,or,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int32,int32,xor,xor,IntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(int32,int32)
|
||||
|
||||
// For everything but atomic min and max, we can use the same
|
||||
// implementations for unsigned as for signed.
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,unsigned int32)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin,unsigned int32)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,swap,swap,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,UIntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,UIntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(unsigned int32,int32)
|
||||
|
||||
DEFINE_ATOMIC_OP(float,float,swap,swap,int32)
|
||||
DEFINE_ATOMIC_SWAP(float,float)
|
||||
|
||||
DEFINE_ATOMIC_OP(int64,int64,add,add,int32)
|
||||
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,int32)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min,int32)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max,int32)
|
||||
DEFINE_ATOMIC_OP(int64,int64,and,and,int32)
|
||||
DEFINE_ATOMIC_OP(int64,int64,or,or,int32)
|
||||
DEFINE_ATOMIC_OP(int64,int64,xor,xor,int32)
|
||||
DEFINE_ATOMIC_OP(int64,int64,swap,swap,int32)
|
||||
DEFINE_ATOMIC_OP(int64,int64,add,add,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,IntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max)
|
||||
DEFINE_ATOMIC_OP(int64,int64,and,and,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int64,int64,or,or,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int64,int64,xor,xor,IntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(int64,int64)
|
||||
|
||||
// For everything but atomic min and max, we can use the same
|
||||
// implementations for unsigned as for signed.
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,unsigned int32)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin,unsigned int32)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap,unsigned int32)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,UIntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,UIntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(unsigned int64,int64)
|
||||
|
||||
DEFINE_ATOMIC_OP(double,double,swap,swap,int32)
|
||||
DEFINE_ATOMIC_SWAP(double,double)
|
||||
|
||||
#undef DEFINE_ATOMIC_OP
|
||||
#undef DEFINE_ATOMIC_MINMAX_OP
|
||||
#undef DEFINE_ATOMIC_SWAP
|
||||
|
||||
#define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \
|
||||
static inline TA atomic_compare_exchange_global( \
|
||||
@@ -907,18 +988,17 @@ static inline uniform TA atomic_compare_exchange_global( \
|
||||
uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = \
|
||||
__atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval, \
|
||||
(MASKTYPE)__mask); \
|
||||
__atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
ATOMIC_DECL_CMPXCHG(int32, int32, int32)
|
||||
ATOMIC_DECL_CMPXCHG(unsigned int32, int32, unsigned int32)
|
||||
ATOMIC_DECL_CMPXCHG(float, float, int32)
|
||||
ATOMIC_DECL_CMPXCHG(int64, int64, int32)
|
||||
ATOMIC_DECL_CMPXCHG(unsigned int64, int64, unsigned int32)
|
||||
ATOMIC_DECL_CMPXCHG(double, double, int32)
|
||||
ATOMIC_DECL_CMPXCHG(int32, int32, IntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(unsigned int32, int32, UIntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(float, float, IntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(int64, int64, IntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(unsigned int64, int64, UIntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(double, double, IntMaskType)
|
||||
|
||||
#undef ATOMIC_DECL_CMPXCHG
|
||||
|
||||
@@ -3071,16 +3151,15 @@ static inline unsigned int random(RNGState * uniform state)
|
||||
{
|
||||
unsigned int b;
|
||||
|
||||
// FIXME: state->z1, etc..
|
||||
b = (((*state).z1 << 6) ^ (*state).z1) >> 13;
|
||||
(*state).z1 = (((*state).z1 & 4294967294U) << 18) ^ b;
|
||||
b = (((*state).z2 << 2) ^ (*state).z2) >> 27;
|
||||
(*state).z2 = (((*state).z2 & 4294967288U) << 2) ^ b;
|
||||
b = (((*state).z3 << 13) ^ (*state).z3) >> 21;
|
||||
(*state).z3 = (((*state).z3 & 4294967280U) << 7) ^ b;
|
||||
b = (((*state).z4 << 3) ^ (*state).z4) >> 12;
|
||||
(*state).z4 = (((*state).z4 & 4294967168U) << 13) ^ b;
|
||||
return ((*state).z1 ^ (*state).z2 ^ (*state).z3 ^ (*state).z4);
|
||||
b = ((state->z1 << 6) ^ state->z1) >> 13;
|
||||
state->z1 = ((state->z1 & 4294967294U) << 18) ^ b;
|
||||
b = ((state->z2 << 2) ^ state->z2) >> 27;
|
||||
state->z2 = ((state->z2 & 4294967288U) << 2) ^ b;
|
||||
b = ((state->z3 << 13) ^ state->z3) >> 21;
|
||||
state->z3 = ((state->z3 & 4294967280U) << 7) ^ b;
|
||||
b = ((state->z4 << 3) ^ state->z4) >> 12;
|
||||
state->z4 = ((state->z4 & 4294967168U) << 13) ^ b;
|
||||
return (state->z1 ^ state->z2 ^ state->z3 ^ state->z4);
|
||||
}
|
||||
|
||||
static inline float frandom(RNGState * uniform state)
|
||||
@@ -3096,30 +3175,30 @@ static inline uniform unsigned int __seed4(RNGState * uniform state,
|
||||
uniform unsigned int c1 = 0xf0f0f0f0;
|
||||
uniform unsigned int c2 = 0x0f0f0f0f;
|
||||
|
||||
(*state).z1 = insert((*state).z1, start + 0, seed);
|
||||
(*state).z1 = insert((*state).z1, start + 1, seed ^ c1);
|
||||
(*state).z1 = insert((*state).z1, start + 2, (seed << 3) ^ c1);
|
||||
(*state).z1 = insert((*state).z1, start + 3, (seed << 2) ^ c2);
|
||||
state->z1 = insert(state->z1, start + 0, seed);
|
||||
state->z1 = insert(state->z1, start + 1, seed ^ c1);
|
||||
state->z1 = insert(state->z1, start + 2, (seed << 3) ^ c1);
|
||||
state->z1 = insert(state->z1, start + 3, (seed << 2) ^ c2);
|
||||
|
||||
seed += 131;
|
||||
(*state).z2 = insert((*state).z2, start + 0, seed);
|
||||
(*state).z2 = insert((*state).z2, start + 1, seed ^ c1);
|
||||
(*state).z2 = insert((*state).z2, start + 2, (seed << 3) ^ c1);
|
||||
(*state).z2 = insert((*state).z2, start + 3, (seed << 2) ^ c2);
|
||||
state->z2 = insert(state->z2, start + 0, seed);
|
||||
state->z2 = insert(state->z2, start + 1, seed ^ c1);
|
||||
state->z2 = insert(state->z2, start + 2, (seed << 3) ^ c1);
|
||||
state->z2 = insert(state->z2, start + 3, (seed << 2) ^ c2);
|
||||
|
||||
seed ^= extract((*state).z2, 2);
|
||||
(*state).z3 = insert((*state).z3, start + 0, seed);
|
||||
(*state).z3 = insert((*state).z3, start + 1, seed ^ c1);
|
||||
(*state).z3 = insert((*state).z3, start + 2, (seed << 3) ^ c1);
|
||||
(*state).z3 = insert((*state).z3, start + 3, (seed << 2) ^ c2);
|
||||
seed ^= extract(state->z2, 2);
|
||||
state->z3 = insert(state->z3, start + 0, seed);
|
||||
state->z3 = insert(state->z3, start + 1, seed ^ c1);
|
||||
state->z3 = insert(state->z3, start + 2, (seed << 3) ^ c1);
|
||||
state->z3 = insert(state->z3, start + 3, (seed << 2) ^ c2);
|
||||
|
||||
seed <<= 4;
|
||||
seed += 3;
|
||||
seed ^= extract((*state).z1, 3);
|
||||
(*state).z4 = insert((*state).z4, start + 0, seed);
|
||||
(*state).z4 = insert((*state).z4, start + 1, seed ^ c1);
|
||||
(*state).z4 = insert((*state).z4, start + 2, (seed << 3) ^ c1);
|
||||
(*state).z4 = insert((*state).z4, start + 3, (seed << 2) ^ c2);
|
||||
seed ^= extract(state->z1, 3);
|
||||
state->z4 = insert(state->z4, start + 0, seed);
|
||||
state->z4 = insert(state->z4, start + 1, seed ^ c1);
|
||||
state->z4 = insert(state->z4, start + 2, (seed << 3) ^ c1);
|
||||
state->z4 = insert(state->z4, start + 3, (seed << 2) ^ c2);
|
||||
|
||||
return seed;
|
||||
}
|
||||
|
||||
@@ -2,11 +2,17 @@
|
||||
|
||||
import sys
|
||||
|
||||
print "char stdlib_code[] = { "
|
||||
t=str(sys.argv[1])
|
||||
|
||||
for line in sys.stdin:
|
||||
for c in line:
|
||||
print ord(c)
|
||||
print ", "
|
||||
sys.stdout.write("char stdlib_" + t + "_code[] = {\n")
|
||||
|
||||
print "0 };"
|
||||
width = 16
|
||||
data = sys.stdin.read()
|
||||
for i in range(0, len(data), 1):
|
||||
sys.stdout.write("0x%0.2X, " % ord(data[i:i+1]))
|
||||
|
||||
if i%width == (width-1):
|
||||
sys.stdout.write("\n")
|
||||
|
||||
sys.stdout.write("0x00 };\n\n")
|
||||
|
||||
|
||||
112
stmt.h
112
stmt.h
@@ -60,8 +60,10 @@ public:
|
||||
virtual void Print(int indent) const = 0;
|
||||
|
||||
// Redeclare these methods with Stmt * return values, rather than
|
||||
// ASTNode *s, as in the original ASTNode declarations of them.
|
||||
virtual Stmt *Optimize() = 0;
|
||||
// ASTNode *s, as in the original ASTNode declarations of them. We'll
|
||||
// also provide a default implementation of Optimize(), since most
|
||||
// Stmts don't have anything to do here.
|
||||
virtual Stmt *Optimize();
|
||||
virtual Stmt *TypeCheck() = 0;
|
||||
};
|
||||
|
||||
@@ -74,7 +76,6 @@ public:
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
@@ -117,7 +118,6 @@ public:
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
@@ -158,7 +158,6 @@ public:
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
@@ -179,7 +178,6 @@ public:
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
@@ -206,7 +204,6 @@ public:
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
@@ -228,7 +225,6 @@ public:
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
@@ -253,7 +249,6 @@ public:
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
@@ -275,7 +270,6 @@ public:
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
@@ -288,6 +282,97 @@ public:
|
||||
};
|
||||
|
||||
|
||||
/** Statement corresponding to a "case" label in the program. In addition
|
||||
to the value associated with the "case", this statement also stores the
|
||||
statements following it. */
|
||||
class CaseStmt : public Stmt {
|
||||
public:
|
||||
CaseStmt(int value, Stmt *stmt, SourcePos pos);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
/** Integer value after the "case" statement */
|
||||
const int value;
|
||||
Stmt *stmts;
|
||||
};
|
||||
|
||||
|
||||
/** Statement for a "default" label (as would be found inside a "switch"
|
||||
statement). */
|
||||
class DefaultStmt : public Stmt {
|
||||
public:
|
||||
DefaultStmt(Stmt *stmt, SourcePos pos);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
Stmt *stmts;
|
||||
};
|
||||
|
||||
|
||||
/** A "switch" statement in the program. */
|
||||
class SwitchStmt : public Stmt {
|
||||
public:
|
||||
SwitchStmt(Expr *expr, Stmt *stmts, SourcePos pos);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
/** Expression that is used to determine which label to jump to. */
|
||||
Expr *expr;
|
||||
/** Statement block after the "switch" expression. */
|
||||
Stmt *stmts;
|
||||
};
|
||||
|
||||
|
||||
/** A "goto" in an ispc program. */
|
||||
class GotoStmt : public Stmt {
|
||||
public:
|
||||
GotoStmt(const char *label, SourcePos gotoPos, SourcePos idPos);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
/** Name of the label to jump to when the goto is executed. */
|
||||
std::string label;
|
||||
SourcePos identifierPos;
|
||||
};
|
||||
|
||||
|
||||
/** Statement corresponding to a label (as would be used as a goto target)
|
||||
in the program. */
|
||||
class LabeledStmt : public Stmt {
|
||||
public:
|
||||
LabeledStmt(const char *label, Stmt *stmt, SourcePos p);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
/** Name of the label. */
|
||||
std::string name;
|
||||
/** Statements following the label. */
|
||||
Stmt *stmt;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Representation of a list of statements in the program.
|
||||
*/
|
||||
class StmtList : public Stmt {
|
||||
@@ -297,14 +382,11 @@ public:
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
void Add(Stmt *s) { if (s) stmts.push_back(s); }
|
||||
const std::vector<Stmt *> &GetStatements() { return stmts; }
|
||||
|
||||
private:
|
||||
std::vector<Stmt *> stmts;
|
||||
};
|
||||
|
||||
@@ -325,7 +407,6 @@ public:
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
@@ -341,7 +422,7 @@ public:
|
||||
|
||||
Like print() above, since we don't have strings as first-class types in
|
||||
the language, we need to do some gymnastics to support it. Like
|
||||
assert() in C, assert checks the given condition and prints an error
|
||||
assert() in C, assert() checks the given condition and prints an error
|
||||
and calls abort if the condition fails. For varying conditions, the
|
||||
assert triggers if it's true for any of the program instances.
|
||||
*/
|
||||
@@ -352,7 +433,6 @@ public:
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
|
||||
81
sym.cpp
81
sym.cpp
@@ -72,8 +72,7 @@ SymbolTable::SymbolTable() {
|
||||
|
||||
SymbolTable::~SymbolTable() {
|
||||
// Otherwise we have mismatched push/pop scopes
|
||||
assert(variables.size() == 1 && functions.size() == 1 &&
|
||||
types.size() == 1);
|
||||
Assert(variables.size() == 1 && types.size() == 1);
|
||||
PopScope();
|
||||
}
|
||||
|
||||
@@ -81,22 +80,17 @@ SymbolTable::~SymbolTable() {
|
||||
void
|
||||
SymbolTable::PushScope() {
|
||||
variables.push_back(new SymbolMapType);
|
||||
functions.push_back(new FunctionMapType);
|
||||
types.push_back(new TypeMapType);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
SymbolTable::PopScope() {
|
||||
assert(variables.size() > 1);
|
||||
Assert(variables.size() > 1);
|
||||
delete variables.back();
|
||||
variables.pop_back();
|
||||
|
||||
assert(functions.size() > 1);
|
||||
delete functions.back();
|
||||
functions.pop_back();
|
||||
|
||||
assert(types.size() > 1);
|
||||
Assert(types.size() > 1);
|
||||
delete types.back();
|
||||
types.pop_back();
|
||||
}
|
||||
@@ -104,7 +98,7 @@ SymbolTable::PopScope() {
|
||||
|
||||
bool
|
||||
SymbolTable::AddVariable(Symbol *symbol) {
|
||||
assert(symbol != NULL);
|
||||
Assert(symbol != NULL);
|
||||
|
||||
// Check to see if a symbol of the same name has already been declared.
|
||||
for (int i = (int)variables.size() - 1; i >= 0; --i) {
|
||||
@@ -154,13 +148,13 @@ SymbolTable::LookupVariable(const char *name) {
|
||||
bool
|
||||
SymbolTable::AddFunction(Symbol *symbol) {
|
||||
const FunctionType *ft = dynamic_cast<const FunctionType *>(symbol->type);
|
||||
assert(ft != NULL);
|
||||
Assert(ft != NULL);
|
||||
if (LookupFunction(symbol->name.c_str(), ft) != NULL)
|
||||
// A function of the same name and type has already been added to
|
||||
// the symbol table
|
||||
return false;
|
||||
|
||||
std::vector<Symbol *> &funOverloads = (*functions.back())[symbol->name];
|
||||
std::vector<Symbol *> &funOverloads = functions[symbol->name];
|
||||
funOverloads.push_back(symbol);
|
||||
return true;
|
||||
}
|
||||
@@ -168,17 +162,14 @@ SymbolTable::AddFunction(Symbol *symbol) {
|
||||
|
||||
bool
|
||||
SymbolTable::LookupFunction(const char *name, std::vector<Symbol *> *matches) {
|
||||
for (int i = (int)functions.size() - 1; i >= 0; --i) {
|
||||
FunctionMapType &fm = *(functions[i]);
|
||||
FunctionMapType::iterator iter = fm.find(name);
|
||||
if (iter != fm.end()) {
|
||||
if (matches == NULL)
|
||||
return true;
|
||||
else {
|
||||
const std::vector<Symbol *> &funcs = iter->second;
|
||||
for (int j = 0; j < (int)funcs.size(); ++j)
|
||||
matches->push_back(funcs[j]);
|
||||
}
|
||||
FunctionMapType::iterator iter = functions.find(name);
|
||||
if (iter != functions.end()) {
|
||||
if (matches == NULL)
|
||||
return true;
|
||||
else {
|
||||
const std::vector<Symbol *> &funcs = iter->second;
|
||||
for (int j = 0; j < (int)funcs.size(); ++j)
|
||||
matches->push_back(funcs[j]);
|
||||
}
|
||||
}
|
||||
return matches ? (matches->size() > 0) : false;
|
||||
@@ -187,15 +178,12 @@ SymbolTable::LookupFunction(const char *name, std::vector<Symbol *> *matches) {
|
||||
|
||||
Symbol *
|
||||
SymbolTable::LookupFunction(const char *name, const FunctionType *type) {
|
||||
for (int i = (int)functions.size() - 1; i >= 0; --i) {
|
||||
FunctionMapType &fm = *(functions[i]);
|
||||
FunctionMapType::iterator iter = fm.find(name);
|
||||
if (iter != fm.end()) {
|
||||
std::vector<Symbol *> funcs = iter->second;
|
||||
for (int j = 0; j < (int)funcs.size(); ++j) {
|
||||
if (Type::Equal(funcs[j]->type, type))
|
||||
return funcs[j];
|
||||
}
|
||||
FunctionMapType::iterator iter = functions.find(name);
|
||||
if (iter != functions.end()) {
|
||||
std::vector<Symbol *> funcs = iter->second;
|
||||
for (int j = 0; j < (int)funcs.size(); ++j) {
|
||||
if (Type::Equal(funcs[j]->type, type))
|
||||
return funcs[j];
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
@@ -261,14 +249,11 @@ SymbolTable::ClosestVariableOrFunctionMatch(const char *str) const {
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)functions.size(); ++i) {
|
||||
const FunctionMapType &fm = *(functions[i]);
|
||||
FunctionMapType::const_iterator iter;
|
||||
for (iter = fm.begin(); iter != fm.end(); ++iter) {
|
||||
int dist = StringEditDistance(str, iter->first, maxDelta+1);
|
||||
if (dist <= maxDelta)
|
||||
matches[dist].push_back(iter->first);
|
||||
}
|
||||
FunctionMapType::const_iterator iter;
|
||||
for (iter = functions.begin(); iter != functions.end(); ++iter) {
|
||||
int dist = StringEditDistance(str, iter->first, maxDelta+1);
|
||||
if (dist <= maxDelta)
|
||||
matches[dist].push_back(iter->first);
|
||||
}
|
||||
|
||||
// Now, return the first entry of matches[] that is non-empty, if any.
|
||||
@@ -346,15 +331,13 @@ SymbolTable::Print() {
|
||||
}
|
||||
|
||||
fprintf(stderr, "Functions:\n----------------\n");
|
||||
for (int i = 0; i < (int)functions.size(); ++i) {
|
||||
FunctionMapType::iterator fiter = functions[i]->begin();
|
||||
while (fiter != functions[i]->end()) {
|
||||
fprintf(stderr, "%s\n", fiter->first.c_str());
|
||||
std::vector<Symbol *> &syms = fiter->second;
|
||||
for (unsigned int j = 0; j < syms.size(); ++j)
|
||||
fprintf(stderr, " %s\n", syms[j]->type->GetString().c_str());
|
||||
++fiter;
|
||||
}
|
||||
FunctionMapType::iterator fiter = functions.begin();
|
||||
while (fiter != functions.end()) {
|
||||
fprintf(stderr, "%s\n", fiter->first.c_str());
|
||||
std::vector<Symbol *> &syms = fiter->second;
|
||||
for (unsigned int j = 0; j < syms.size(); ++j)
|
||||
fprintf(stderr, " %s\n", syms[j]->type->GetString().c_str());
|
||||
++fiter;
|
||||
}
|
||||
|
||||
depth = 0;
|
||||
|
||||
26
sym.h
26
sym.h
@@ -257,12 +257,13 @@ private:
|
||||
typedef std::map<std::string, Symbol *> SymbolMapType;
|
||||
std::vector<SymbolMapType *> variables;
|
||||
|
||||
/** Function declarations are also scoped., A STL \c vector is used to
|
||||
store the function symbols for a given name since, due to function
|
||||
overloading, a name can have multiple function symbols associated
|
||||
with it. */
|
||||
/** Function declarations are *not* scoped. (C99, for example, allows
|
||||
an implementation to maintain function declarations in a single
|
||||
namespace.) A STL \c vector is used to store the function symbols
|
||||
for a given name since, due to function overloading, a name can
|
||||
have multiple function symbols associated with it. */
|
||||
typedef std::map<std::string, std::vector<Symbol *> > FunctionMapType;
|
||||
std::vector<FunctionMapType *> functions;
|
||||
FunctionMapType functions;
|
||||
|
||||
/** Type definitions can also be scoped. A new \c TypeMapType
|
||||
is added to the back of the \c types \c vector each time a new scope
|
||||
@@ -278,15 +279,12 @@ SymbolTable::GetMatchingFunctions(Predicate pred,
|
||||
std::vector<Symbol *> *matches) const {
|
||||
// Iterate through all function symbols and apply the given predicate.
|
||||
// If it returns true, add the Symbol * to the provided vector.
|
||||
for (unsigned int i = 0; i < functions.size(); ++i) {
|
||||
FunctionMapType &fm = *(functions[i]);
|
||||
FunctionMapType::const_iterator iter;
|
||||
for (iter = fm.begin(); iter != fm.end(); ++iter) {
|
||||
const std::vector<Symbol *> &syms = iter->second;
|
||||
for (unsigned int j = 0; j < syms.size(); ++j) {
|
||||
if (pred(syms[j]))
|
||||
matches->push_back(syms[j]);
|
||||
}
|
||||
FunctionMapType::const_iterator iter;
|
||||
for (iter = functions.begin(); iter != functions.end(); ++iter) {
|
||||
const std::vector<Symbol *> &syms = iter->second;
|
||||
for (unsigned int j = 0; j < syms.size(); ++j) {
|
||||
if (pred(syms[j]))
|
||||
matches->push_back(syms[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,9 +39,13 @@
|
||||
#define ISPC_IS_APPLE
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#include <windows.h>
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#ifdef ISPC_IS_LINUX
|
||||
#include <malloc.h>
|
||||
|
||||
17
tests/atomics-swap.ispc
Normal file
17
tests/atomics-swap.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int32 s = 1234;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
if (programIndex & 1) {
|
||||
b = atomic_swap_global(&s, programIndex);
|
||||
}
|
||||
RET[programIndex] = reduce_add(b) + s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1234 + reduce_add(programIndex & 1 ? programIndex : 0);
|
||||
}
|
||||
18
tests/atomics-varyingptr-1.ispc
Normal file
18
tests/atomics-varyingptr-1.ispc
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
float delta = 1;
|
||||
if (programIndex < 2)
|
||||
atomic_add_global(&s[programIndex], delta);
|
||||
RET[programIndex] = s[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
RET[0] = RET[1] = 1;
|
||||
}
|
||||
16
tests/atomics-varyingptr-2.ispc
Normal file
16
tests/atomics-varyingptr-2.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
float delta = 1;
|
||||
atomic_add_global(&s[programCount-1-programIndex], programIndex);
|
||||
RET[programIndex] = s[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount-1-programIndex;
|
||||
}
|
||||
18
tests/atomics-varyingptr-3.ispc
Normal file
18
tests/atomics-varyingptr-3.ispc
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
s[i] = 1234;
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
float delta = 1;
|
||||
a = atomic_max_global(&s[programIndex], programIndex);
|
||||
RET[programIndex] = a;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1234;
|
||||
}
|
||||
15
tests/atomics-varyingptr-4.ispc
Normal file
15
tests/atomics-varyingptr-4.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
s[i] = -1234;
|
||||
atomic_max_global(&s[programIndex], programIndex);
|
||||
RET[programIndex] = s[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
}
|
||||
31
tests/gather-struct-vector.ispc
Normal file
31
tests/gather-struct-vector.ispc
Normal file
@@ -0,0 +1,31 @@
|
||||
|
||||
struct Ray {
|
||||
float<3> v;
|
||||
};
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
Ray r[programCount];
|
||||
for (uniform int i = 0; i < programCount; ++i) {
|
||||
r[i].v.x = 100*i + programIndex;
|
||||
r[i].v.y = 200*i + 2*programIndex;
|
||||
r[i].v.z = 300*i + 3*programIndex;
|
||||
}
|
||||
|
||||
Ray *rp = &r[programIndex/2];
|
||||
RET[programIndex] = rp->v.z;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int d0 = 0;
|
||||
uniform int d1 = 0;
|
||||
for (uniform int i = 0; i < programCount; i += 2) {
|
||||
RET[i] = d0+d1;
|
||||
d1 += 3;
|
||||
RET[i+1] = d0+d1;
|
||||
d0 += 300;
|
||||
d1 += 3;
|
||||
}
|
||||
}
|
||||
17
tests/goto-1.ispc
Normal file
17
tests/goto-1.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0.; b = a;
|
||||
RET[programIndex] = a+b;
|
||||
goto skip;
|
||||
RET[programIndex] = 0;
|
||||
skip:
|
||||
;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2 + 2*programIndex;
|
||||
}
|
||||
18
tests/goto-2.ispc
Normal file
18
tests/goto-2.ispc
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0.; b = a;
|
||||
RET[programIndex] = a+b;
|
||||
if (all(a != 0))
|
||||
goto skip;
|
||||
RET[programIndex] = 0;
|
||||
skip:
|
||||
;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2 + 2*programIndex;
|
||||
}
|
||||
18
tests/goto-3.ispc
Normal file
18
tests/goto-3.ispc
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0.; b = a;
|
||||
RET[programIndex] = a+b;
|
||||
if (all(a == 0))
|
||||
goto skip;
|
||||
RET[programIndex] = 0;
|
||||
skip:
|
||||
;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
19
tests/goto-4.ispc
Normal file
19
tests/goto-4.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0.; b = a;
|
||||
RET[programIndex] = 0;
|
||||
encore:
|
||||
++RET[programIndex];
|
||||
if (any(a != 0)) {
|
||||
a = max(a-1, 0);
|
||||
goto encore;
|
||||
}
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount+1;
|
||||
}
|
||||
13
tests/kilo-mega-giga-1.ispc
Normal file
13
tests/kilo-mega-giga-1.ispc
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
a *= 1k;
|
||||
RET[programIndex] = a;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1024*(programIndex+1);
|
||||
}
|
||||
12
tests/kilo-mega-giga-2.ispc
Normal file
12
tests/kilo-mega-giga-2.ispc
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = b + 2M;
|
||||
RET[programIndex] = a;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2*1024*1024 + 5;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user