Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6cf4d7e216 | ||
|
|
865e430b56 | ||
|
|
990bee5a86 | ||
|
|
b84167dddd | ||
|
|
f39d31174e | ||
|
|
39542f420a | ||
|
|
d340dcbfcc | ||
|
|
e5bc6cd67c | ||
|
|
40bd133dec | ||
|
|
2ced56736e | ||
|
|
bf74a3360f | ||
|
|
aaafdf80f2 | ||
|
|
6086d3597c | ||
|
|
a3fbb098ad | ||
|
|
38d4ecccf4 | ||
|
|
af435e52c1 | ||
|
|
8b7522e98b | ||
|
|
bffb380677 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,2 +1,6 @@
|
|||||||
*.pyc
|
*.pyc
|
||||||
*~
|
*~
|
||||||
|
depend
|
||||||
|
ispc
|
||||||
|
ispc_test
|
||||||
|
objs
|
||||||
|
|||||||
11
Makefile
11
Makefile
@@ -43,7 +43,7 @@ OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(STDLIB_SRC:.ll=.o) stdlib-c.o stdli
|
|||||||
|
|
||||||
default: ispc ispc_test
|
default: ispc ispc_test
|
||||||
|
|
||||||
.PHONY: dirs clean depend doxygen
|
.PHONY: dirs clean depend doxygen print_llvm_src
|
||||||
.PRECIOUS: objs/stdlib-%.cpp
|
.PRECIOUS: objs/stdlib-%.cpp
|
||||||
|
|
||||||
depend: $(CXX_SRC) $(HEADERS)
|
depend: $(CXX_SRC) $(HEADERS)
|
||||||
@@ -56,6 +56,9 @@ dirs:
|
|||||||
@echo Creating objs/ directory
|
@echo Creating objs/ directory
|
||||||
@/bin/mkdir -p objs
|
@/bin/mkdir -p objs
|
||||||
|
|
||||||
|
print_llvm_src:
|
||||||
|
@echo Using LLVM `llvm-config --version` from `llvm-config --libdir`
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
/bin/rm -rf objs ispc ispc_test
|
/bin/rm -rf objs ispc ispc_test
|
||||||
|
|
||||||
@@ -63,7 +66,7 @@ doxygen:
|
|||||||
/bin/rm -rf docs/doxygen
|
/bin/rm -rf docs/doxygen
|
||||||
doxygen doxygen.cfg
|
doxygen doxygen.cfg
|
||||||
|
|
||||||
ispc: dirs $(OBJS)
|
ispc: print_llvm_src dirs $(OBJS)
|
||||||
@echo Creating ispc executable
|
@echo Creating ispc executable
|
||||||
@$(CXX) $(LDFLAGS) -o $@ $(OBJS) $(LLVM_LIBS)
|
@$(CXX) $(LDFLAGS) -o $@ $(OBJS) $(LLVM_LIBS)
|
||||||
|
|
||||||
@@ -87,7 +90,7 @@ objs/lex.cpp: lex.ll
|
|||||||
@echo Running flex on $<
|
@echo Running flex on $<
|
||||||
@$(LEX) -o $@ $<
|
@$(LEX) -o $@ $<
|
||||||
|
|
||||||
objs/lex.o: objs/lex.cpp $(HEADERS)
|
objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||||
|
|
||||||
@@ -111,7 +114,7 @@ objs/stdlib-c.o: objs/stdlib-c.cpp
|
|||||||
|
|
||||||
objs/stdlib_ispc.cpp: stdlib.ispc
|
objs/stdlib_ispc.cpp: stdlib.ispc
|
||||||
@echo Creating C++ source from $<
|
@echo Creating C++ source from $<
|
||||||
@$(CPP) -DISPC=1 -DPI=3.1415936535 $< | ./stdlib2cpp.py > $@
|
@$(CPP) -DISPC=1 -DPI=3.1415926536 $< | ./stdlib2cpp.py > $@
|
||||||
|
|
||||||
objs/stdlib_ispc.o: objs/stdlib_ispc.cpp
|
objs/stdlib_ispc.o: objs/stdlib_ispc.cpp
|
||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
|
|||||||
40
ctx.cpp
40
ctx.cpp
@@ -1315,8 +1315,21 @@ FunctionEmitContext::LoadInst(llvm::Value *lvalue, const Type *type,
|
|||||||
|
|
||||||
if (llvm::isa<const llvm::PointerType>(lvalue->getType())) {
|
if (llvm::isa<const llvm::PointerType>(lvalue->getType())) {
|
||||||
// If the lvalue is a straight up regular pointer, then just issue
|
// If the lvalue is a straight up regular pointer, then just issue
|
||||||
// a regular load
|
// a regular load. First figure out the alignment; in general we
|
||||||
llvm::Instruction *inst = new llvm::LoadInst(lvalue, name ? name : "load", bblock);
|
// can just assume the natural alignment (0 here), but for varying
|
||||||
|
// atomic types, we need to make sure that the compiler emits
|
||||||
|
// unaligned vector loads, so we specify a reduced alignment here.
|
||||||
|
int align = 0;
|
||||||
|
const AtomicType *atomicType = dynamic_cast<const AtomicType *>(type);
|
||||||
|
if (atomicType != NULL && atomicType->IsVaryingType())
|
||||||
|
// We actually just want to align to the vector element
|
||||||
|
// alignment, but can't easily get that here, so just tell LLVM
|
||||||
|
// it's totally unaligned. (This shouldn't make any difference
|
||||||
|
// vs the proper alignment in practice.)
|
||||||
|
align = 1;
|
||||||
|
llvm::Instruction *inst = new llvm::LoadInst(lvalue, name ? name : "load",
|
||||||
|
false /* not volatile */,
|
||||||
|
align, bblock);
|
||||||
AddDebugPos(inst);
|
AddDebugPos(inst);
|
||||||
return inst;
|
return inst;
|
||||||
}
|
}
|
||||||
@@ -1437,7 +1450,7 @@ FunctionEmitContext::addGSMetadata(llvm::Instruction *inst, SourcePos pos) {
|
|||||||
|
|
||||||
llvm::Value *line = LLVMInt32(pos.first_line);
|
llvm::Value *line = LLVMInt32(pos.first_line);
|
||||||
#ifdef LLVM_2_8
|
#ifdef LLVM_2_8
|
||||||
md = llvm::MDNode::get(*g->ctx, &first_line, 1);
|
md = llvm::MDNode::get(*g->ctx, &line, 1);
|
||||||
#else
|
#else
|
||||||
md = llvm::MDNode::get(*g->ctx, line);
|
md = llvm::MDNode::get(*g->ctx, line);
|
||||||
#endif
|
#endif
|
||||||
@@ -1445,7 +1458,7 @@ FunctionEmitContext::addGSMetadata(llvm::Instruction *inst, SourcePos pos) {
|
|||||||
|
|
||||||
llvm::Value *column = LLVMInt32(pos.first_column);
|
llvm::Value *column = LLVMInt32(pos.first_column);
|
||||||
#ifdef LLVM_2_8
|
#ifdef LLVM_2_8
|
||||||
md = llvm::MDNode::get(*g->ctx, &first_column, 1);
|
md = llvm::MDNode::get(*g->ctx, &column, 1);
|
||||||
#else
|
#else
|
||||||
md = llvm::MDNode::get(*g->ctx, column);
|
md = llvm::MDNode::get(*g->ctx, column);
|
||||||
#endif
|
#endif
|
||||||
@@ -1644,7 +1657,16 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Instruction *inst = new llvm::StoreInst(rvalue, lvalue, name, bblock);
|
llvm::Instruction *inst;
|
||||||
|
if (llvm::isa<llvm::VectorType>(rvalue->getType()))
|
||||||
|
// Specify an unaligned store, since we don't know that the lvalue
|
||||||
|
// will in fact be aligned to a vector width here. (Actually
|
||||||
|
// should be aligned to the alignment of the vector elment type...)
|
||||||
|
inst = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
||||||
|
1, bblock);
|
||||||
|
else
|
||||||
|
inst = new llvm::StoreInst(rvalue, lvalue, bblock);
|
||||||
|
|
||||||
AddDebugPos(inst);
|
AddDebugPos(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1661,8 +1683,8 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
|
|||||||
|
|
||||||
// Figure out what kind of store we're doing here
|
// Figure out what kind of store we're doing here
|
||||||
if (rvalueType->IsUniformType()) {
|
if (rvalueType->IsUniformType()) {
|
||||||
// The easy case; a regular store
|
// The easy case; a regular store, natural alignment is fine
|
||||||
llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, name, bblock);
|
llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, bblock);
|
||||||
AddDebugPos(si);
|
AddDebugPos(si);
|
||||||
}
|
}
|
||||||
else if (llvm::isa<const llvm::ArrayType>(lvalue->getType()))
|
else if (llvm::isa<const llvm::ArrayType>(lvalue->getType()))
|
||||||
@@ -1672,9 +1694,7 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
|
|||||||
else if (storeMask == LLVMMaskAllOn) {
|
else if (storeMask == LLVMMaskAllOn) {
|
||||||
// Otherwise it is a masked store unless we can determine that the
|
// Otherwise it is a masked store unless we can determine that the
|
||||||
// mask is all on...
|
// mask is all on...
|
||||||
llvm::Instruction *si =
|
StoreInst(rvalue, lvalue, name);
|
||||||
new llvm::StoreInst(rvalue, lvalue, name, bblock);
|
|
||||||
AddDebugPos(si);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
maskedStore(rvalue, lvalue, rvalueType, storeMask);
|
maskedStore(rvalue, lvalue, rvalueType, storeMask);
|
||||||
|
|||||||
@@ -1970,7 +1970,7 @@ Data Layout
|
|||||||
|
|
||||||
In general, ``ispc`` tries to ensure that ``struct`` s and other complex
|
In general, ``ispc`` tries to ensure that ``struct`` s and other complex
|
||||||
datatypes are laid out in the same way in memory as they are in C/C++.
|
datatypes are laid out in the same way in memory as they are in C/C++.
|
||||||
Matching alignment is important for easy interoperability between C/C++
|
Matching structure layout is important for easy interoperability between C/C++
|
||||||
code and ``ispc`` code.
|
code and ``ispc`` code.
|
||||||
|
|
||||||
The main complexity in sharing data between ``ispc`` and C/C++ often comes
|
The main complexity in sharing data between ``ispc`` and C/C++ often comes
|
||||||
@@ -2023,11 +2023,6 @@ It can pass ``array`` to a ``ispc`` function defined as:
|
|||||||
|
|
||||||
export void foo(uniform float array[], uniform int count)
|
export void foo(uniform float array[], uniform int count)
|
||||||
|
|
||||||
(Though the pointer must be aligned to the compilation target's natural
|
|
||||||
vector width; see the discussion of alignment restrictions in `Data
|
|
||||||
Alignment and Aliasing`_ and the aligned allocation routines in
|
|
||||||
``examples/options/options.cpp`` for example.)
|
|
||||||
|
|
||||||
Similarly, ``struct`` s from the application can have embedded pointers.
|
Similarly, ``struct`` s from the application can have embedded pointers.
|
||||||
This is handled with similar ``[]`` syntax:
|
This is handled with similar ``[]`` syntax:
|
||||||
|
|
||||||
@@ -2062,55 +2057,20 @@ vector types from C/C++ application code if possible.
|
|||||||
Data Alignment and Aliasing
|
Data Alignment and Aliasing
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
There are two important constraints that must be adhered to when passing
|
There are are two important constraints that must be adhered to when
|
||||||
pointers from the application to ``ispc`` programs.
|
passing pointers from the application to ``ispc`` programs.
|
||||||
|
|
||||||
The first constraint is alignment: any pointers from the host program that
|
The first is that it is required that it be valid to read memory at the
|
||||||
are passed to ``ispc`` must be aligned to natural vector alignment of
|
first element of any array that is passed to ``ispc``. In practice, this
|
||||||
system--for example, 16 byte alignment on a target that supports Intel®
|
should just happen naturally, but it does mean that it is illegal to pass a
|
||||||
SSE, 32-byte on an Intel® AVX target. If this constraint isn't met, the
|
``NULL`` pointer as a parameter to a ``ispc`` function called from the
|
||||||
program may abort at runtime with an unaligned memory access error.
|
application.
|
||||||
|
|
||||||
For example, in a ``ispc`` function with the following declaration:
|
The second constraint is that pointers and references in ``ispc`` programs
|
||||||
|
must not alias. The ``ispc`` compiler assumes that different pointers
|
||||||
::
|
can't end up pointing to the same memory location, either due to having the
|
||||||
|
same initial value, or through array indexing in the program as it
|
||||||
export void foo(uniform float in[], uniform float out[],
|
executed.
|
||||||
int count);
|
|
||||||
|
|
||||||
If the application is passing stack-allocated arrays for ``in`` and
|
|
||||||
``out``, these C/C++ compiler must be told to align these arrays.
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
// MSVC, SSE target
|
|
||||||
__declspec(align(16)) float in[16], out[16];
|
|
||||||
foo(in, out, 16);
|
|
||||||
|
|
||||||
With the gcc/clang compilers, the syntax for providing alignment is
|
|
||||||
slightly different:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
float x[16] __attribute__ ((__align__(16)));
|
|
||||||
foo(in, out, 16);
|
|
||||||
|
|
||||||
If the data being passed is dynamically allocated, the appropriate system
|
|
||||||
aligned memory allocation routine should be used to allocate it (for
|
|
||||||
example, ``_aligned_malloc()`` with Windows\*, ``memalign()`` with
|
|
||||||
Linux\*; see the ``AllocAligned()`` function in ``examples/rt/rt.cpp`` for
|
|
||||||
an example.)
|
|
||||||
|
|
||||||
It is also required that it be valid to read memory at the first element of
|
|
||||||
any array that is passed to ``ispc``. In practice, this should just
|
|
||||||
happen naturally, but it does mean that it is illegal to pass a ``NULL``
|
|
||||||
pointer as a parameter to a ``ispc`` function called from the application.
|
|
||||||
|
|
||||||
The second key constraint is that pointers and references in ``ispc``
|
|
||||||
programs must not alias. The ``ispc`` compiler assumes that different
|
|
||||||
pointers can't end up pointing to the same memory location, either due to
|
|
||||||
having the same initial value, or through array indexing in the program as
|
|
||||||
it executed.
|
|
||||||
|
|
||||||
This aliasing constraint also applies to ``reference`` parameters to
|
This aliasing constraint also applies to ``reference`` parameters to
|
||||||
functions. Given a function like:
|
functions. Given a function like:
|
||||||
@@ -2127,8 +2087,8 @@ another case of aliasing, and if the caller calls the function as ``func(x,
|
|||||||
x)``, it's not guaranteed that the ``if`` test will evaluate to true, due
|
x)``, it's not guaranteed that the ``if`` test will evaluate to true, due
|
||||||
to the compiler's requirement of no aliasing.
|
to the compiler's requirement of no aliasing.
|
||||||
|
|
||||||
(In the future, ``ispc`` will have the ability to work with unaligned
|
(In the future, ``ispc`` will have a mechanism to indicate that pointers
|
||||||
memory as well as have a mechanism to indicate that pointers may alias.)
|
may alias.)
|
||||||
|
|
||||||
Using ISPC Effectively
|
Using ISPC Effectively
|
||||||
======================
|
======================
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
|
|
||||||
CXX=g++
|
CXX=g++ -m64
|
||||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPCFLAGS=-O2 --fast-math
|
ISPCFLAGS=-O2 --fast-math --arch=x86-64
|
||||||
|
|
||||||
default: ao
|
default: ao
|
||||||
|
|
||||||
|
|||||||
@@ -103,24 +103,6 @@ savePPM(const char *fname, int w, int h)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Allocate memory with 64-byte alignment.
|
|
||||||
float *
|
|
||||||
AllocAligned(int size) {
|
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
|
||||||
return (float *)_aligned_malloc(size, 64);
|
|
||||||
#elif defined (__APPLE__)
|
|
||||||
// Allocate excess memory to ensure an aligned pointer can be returned
|
|
||||||
void *mem = malloc(size + (64-1) + sizeof(void*));
|
|
||||||
char *amem = ((char*)mem) + sizeof(void*);
|
|
||||||
amem += 64 - (reinterpret_cast<uint64_t>(amem) & (64 - 1));
|
|
||||||
((void**)amem)[-1] = mem;
|
|
||||||
return (float *)amem;
|
|
||||||
#else
|
|
||||||
return (float *)memalign(64, size);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
if (argc != 4) {
|
if (argc != 4) {
|
||||||
@@ -136,8 +118,8 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Allocate space for output images
|
// Allocate space for output images
|
||||||
img = (unsigned char *)AllocAligned(width * height * 3);
|
img = new unsigned char[width * height * 3];
|
||||||
fimg = (float *)AllocAligned(sizeof(float) * width * height * 3);
|
fimg = new float[width * height * 3];
|
||||||
|
|
||||||
//
|
//
|
||||||
// Run the ispc path, test_iterations times, and report the minimum
|
// Run the ispc path, test_iterations times, and report the minimum
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
|
|
||||||
CXX=g++
|
CXX=g++ -m64
|
||||||
CXXFLAGS=-Iobjs/ -g3 -Wall
|
CXXFLAGS=-Iobjs/ -g3 -Wall
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPCFLAGS=-O2 --fast-math --instrument
|
ISPCFLAGS=-O2 --fast-math --instrument --arch=x86-64
|
||||||
|
|
||||||
default: ao
|
default: ao
|
||||||
|
|
||||||
|
|||||||
@@ -102,24 +102,6 @@ savePPM(const char *fname, int w, int h)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Allocate memory with 64-byte alignment.
|
|
||||||
float *
|
|
||||||
AllocAligned(int size) {
|
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
|
||||||
return (float *)_aligned_malloc(size, 64);
|
|
||||||
#elif defined (__APPLE__)
|
|
||||||
// Allocate excess memory to ensure an aligned pointer can be returned
|
|
||||||
void *mem = malloc(size + (64-1) + sizeof(void*));
|
|
||||||
char *amem = ((char*)mem) + sizeof(void*);
|
|
||||||
amem += 64 - (reinterpret_cast<uint64_t>(amem) & (64 - 1));
|
|
||||||
((void**)amem)[-1] = mem;
|
|
||||||
return (float *)amem;
|
|
||||||
#else
|
|
||||||
return (float *)memalign(64, size);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
if (argc != 4) {
|
if (argc != 4) {
|
||||||
@@ -135,8 +117,8 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Allocate space for output images
|
// Allocate space for output images
|
||||||
img = (unsigned char *)AllocAligned(width * height * 3);
|
img = new unsigned char[width * height * 3];
|
||||||
fimg = (float *)AllocAligned(sizeof(float) * width * height * 3);
|
fimg = new float[width * height * 3];
|
||||||
|
|
||||||
ao_ispc(width, height, NSUBSAMPLES, fimg);
|
ao_ispc(width, height, NSUBSAMPLES, fimg);
|
||||||
|
|
||||||
|
|||||||
3
examples/mandelbrot/.gitignore
vendored
Normal file
3
examples/mandelbrot/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
mandelbrot
|
||||||
|
*.ppm
|
||||||
|
objs
|
||||||
@@ -1,8 +1,8 @@
|
|||||||
|
|
||||||
CXX=g++
|
CXX=g++ -m64
|
||||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPCFLAGS=-O2 --target=sse4x2
|
ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
|
||||||
|
|
||||||
default: mandelbrot
|
default: mandelbrot
|
||||||
|
|
||||||
|
|||||||
@@ -11,10 +11,10 @@ endif
|
|||||||
|
|
||||||
TASK_OBJ=$(addprefix objs/, $(TASK_CXX:.cpp=.o))
|
TASK_OBJ=$(addprefix objs/, $(TASK_CXX:.cpp=.o))
|
||||||
|
|
||||||
CXX=g++
|
CXX=g++ -m64
|
||||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPCFLAGS=-O2 --target=sse4x2
|
ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
|
||||||
|
|
||||||
default: mandelbrot
|
default: mandelbrot
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
|
|
||||||
CXX=g++
|
CXX=g++ -m64
|
||||||
CXXFLAGS=-Iobjs/ -g -Wall
|
CXXFLAGS=-Iobjs/ -g -Wall
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPCFLAGS=-O2 --target=sse4x2
|
ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
|
||||||
|
|
||||||
default: options
|
default: options
|
||||||
|
|
||||||
|
|||||||
@@ -37,9 +37,6 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#ifndef __APPLE__
|
|
||||||
#include <malloc.h>
|
|
||||||
#endif // !__APPLE__
|
|
||||||
using std::max;
|
using std::max;
|
||||||
|
|
||||||
#include "options_defs.h"
|
#include "options_defs.h"
|
||||||
@@ -48,23 +45,6 @@ using std::max;
|
|||||||
#include "options_ispc.h"
|
#include "options_ispc.h"
|
||||||
using namespace ispc;
|
using namespace ispc;
|
||||||
|
|
||||||
// Allocate memory with 64-byte alignment.
|
|
||||||
float *AllocFloats(int count) {
|
|
||||||
int size = count * sizeof(float);
|
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
|
||||||
return (float *)_aligned_malloc(size, 64);
|
|
||||||
#elif defined (__APPLE__)
|
|
||||||
// Allocate excess memory to ensure an aligned pointer can be returned
|
|
||||||
void *mem = malloc(size + (64-1) + sizeof(void*));
|
|
||||||
char *amem = ((char*)mem) + sizeof(void*);
|
|
||||||
amem += 64 - (reinterpret_cast<uint64_t>(amem) & (64 - 1));
|
|
||||||
((void**)amem)[-1] = mem;
|
|
||||||
return (float *)amem;
|
|
||||||
#else
|
|
||||||
return (float *)memalign(64, size);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
extern void black_scholes_serial(float Sa[], float Xa[], float Ta[],
|
extern void black_scholes_serial(float Sa[], float Xa[], float Ta[],
|
||||||
float ra[], float va[],
|
float ra[], float va[],
|
||||||
float result[], int count);
|
float result[], int count);
|
||||||
@@ -76,12 +56,12 @@ extern void binomial_put_serial(float Sa[], float Xa[], float Ta[],
|
|||||||
int main() {
|
int main() {
|
||||||
// Pointers passed to ispc code must have alignment of the target's
|
// Pointers passed to ispc code must have alignment of the target's
|
||||||
// vector width at minimum.
|
// vector width at minimum.
|
||||||
float *S = AllocFloats(N_OPTIONS);
|
float *S = new float[N_OPTIONS];
|
||||||
float *X = AllocFloats(N_OPTIONS);
|
float *X = new float[N_OPTIONS];
|
||||||
float *T = AllocFloats(N_OPTIONS);
|
float *T = new float[N_OPTIONS];
|
||||||
float *r = AllocFloats(N_OPTIONS);
|
float *r = new float[N_OPTIONS];
|
||||||
float *v = AllocFloats(N_OPTIONS);
|
float *v = new float[N_OPTIONS];
|
||||||
float *result = AllocFloats(N_OPTIONS);
|
float *result = new float[N_OPTIONS];
|
||||||
|
|
||||||
for (int i = 0; i < N_OPTIONS; ++i) {
|
for (int i = 0; i < N_OPTIONS; ++i) {
|
||||||
S[i] = 100; // stock price
|
S[i] = 100; // stock price
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
|
|
||||||
CXX=g++
|
CXX=g++ -m64
|
||||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPCFLAGS=-O2 --target=sse4x2
|
ISPCFLAGS=-O2 --target=sse4x2 --arch=x86-64
|
||||||
|
|
||||||
default: rt
|
default: rt
|
||||||
|
|
||||||
|
|||||||
@@ -43,9 +43,6 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#ifndef __APPLE__
|
|
||||||
#include <malloc.h>
|
|
||||||
#endif
|
|
||||||
#include "../timing.h"
|
#include "../timing.h"
|
||||||
#include "rt_ispc.h"
|
#include "rt_ispc.h"
|
||||||
|
|
||||||
@@ -53,23 +50,6 @@ using namespace ispc;
|
|||||||
|
|
||||||
typedef unsigned int uint;
|
typedef unsigned int uint;
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
T *AllocAligned(int count) {
|
|
||||||
int size = count * sizeof(T);
|
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
|
||||||
return (T *)_aligned_malloc(size, 64);
|
|
||||||
#elif defined (__APPLE__)
|
|
||||||
// Allocate excess memory to ensure an aligned pointer can be returned
|
|
||||||
void *mem = malloc(size + (64-1) + sizeof(void*));
|
|
||||||
char *amem = ((char*)mem) + sizeof(void*);
|
|
||||||
amem += 64 - (reinterpret_cast<uint64_t>(amem) & (64 - 1));
|
|
||||||
((void**)amem)[-1] = mem;
|
|
||||||
return (T *)amem;
|
|
||||||
#else
|
|
||||||
return (T *)memalign(64, size);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
extern void raytrace_serial(int width, int height, const float raster2camera[4][4],
|
extern void raytrace_serial(int width, int height, const float raster2camera[4][4],
|
||||||
const float camera2world[4][4], float image[],
|
const float camera2world[4][4], float image[],
|
||||||
int id[], const LinearBVHNode nodes[],
|
int id[], const LinearBVHNode nodes[],
|
||||||
@@ -161,7 +141,7 @@ int main(int argc, char *argv[]) {
|
|||||||
uint nNodes;
|
uint nNodes;
|
||||||
READ(nNodes, 1);
|
READ(nNodes, 1);
|
||||||
|
|
||||||
LinearBVHNode *nodes = AllocAligned<LinearBVHNode>(nNodes);
|
LinearBVHNode *nodes = new LinearBVHNode[nNodes];
|
||||||
for (unsigned int i = 0; i < nNodes; ++i) {
|
for (unsigned int i = 0; i < nNodes; ++i) {
|
||||||
// Each node is 6x floats for a boox, then an integer for an offset
|
// Each node is 6x floats for a boox, then an integer for an offset
|
||||||
// to the second child node, then an integer that encodes the type
|
// to the second child node, then an integer that encodes the type
|
||||||
@@ -181,7 +161,7 @@ int main(int argc, char *argv[]) {
|
|||||||
// And then read the triangles
|
// And then read the triangles
|
||||||
uint nTris;
|
uint nTris;
|
||||||
READ(nTris, 1);
|
READ(nTris, 1);
|
||||||
Triangle *triangles = AllocAligned<Triangle>(nTris);
|
Triangle *triangles = new Triangle[nTris];
|
||||||
for (uint i = 0; i < nTris; ++i) {
|
for (uint i = 0; i < nTris; ++i) {
|
||||||
// 9x floats for the 3 vertices
|
// 9x floats for the 3 vertices
|
||||||
float v[9];
|
float v[9];
|
||||||
|
|||||||
2
examples/simple/.gitignore
vendored
Normal file
2
examples/simple/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
simple
|
||||||
|
objs
|
||||||
@@ -1,8 +1,8 @@
|
|||||||
|
|
||||||
CXX=g++
|
CXX=g++ -m64
|
||||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPCFLAGS=-O2
|
ISPCFLAGS=-O2 --arch=x86-64
|
||||||
|
|
||||||
default: simple
|
default: simple
|
||||||
|
|
||||||
|
|||||||
@@ -38,15 +38,7 @@
|
|||||||
using namespace ispc;
|
using namespace ispc;
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
// Pointers passed to ispc-compiled code are currently required to have
|
float vin[16], vout[16];
|
||||||
// alignment equal to the target's native vector size. Here we align
|
|
||||||
// to 32 bytes to be safe for both SSE and AVX targets.
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
__declspec(align(32)) float vin[16], vout[16];
|
|
||||||
#else
|
|
||||||
float vin[16] __attribute__((aligned(32)));
|
|
||||||
float vout[16] __attribute__((aligned(32)));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Initialize input buffer
|
// Initialize input buffer
|
||||||
for (int i = 0; i < 16; ++i)
|
for (int i = 0; i < 16; ++i)
|
||||||
|
|||||||
3
expr.cpp
3
expr.cpp
@@ -380,7 +380,7 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (unsigned int i = 0; i < lvt->getNumElements(); ++i)
|
for (unsigned int i = 0; i < lvt->getNumElements(); ++i)
|
||||||
vals.push_back(constElement);
|
vals.push_back(constElement);
|
||||||
return llvm::ConstantVector::get(lvt, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
const llvm::ArrayType *lat =
|
const llvm::ArrayType *lat =
|
||||||
@@ -3203,6 +3203,7 @@ ConstExpr::ConstExpr(ConstExpr *old, double *v)
|
|||||||
break;
|
break;
|
||||||
case AtomicType::TYPE_INT64:
|
case AtomicType::TYPE_INT64:
|
||||||
case AtomicType::TYPE_UINT64:
|
case AtomicType::TYPE_UINT64:
|
||||||
|
// For now, this should never be reached
|
||||||
FATAL("fixme; we need another constructor so that we're not trying to pass "
|
FATAL("fixme; we need another constructor so that we're not trying to pass "
|
||||||
"double values to init an int64 type...");
|
"double values to init an int64 type...");
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
<ProjectConfiguration Include="Debug|Win32">
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
@@ -179,7 +179,7 @@
|
|||||||
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
||||||
<WarningLevel>Level3</WarningLevel>
|
<WarningLevel>Level3</WarningLevel>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<PreprocessorDefinitions>NOMINMAX</PreprocessorDefinitions>
|
<PreprocessorDefinitions>NOMINMAX;LLVM_2_9</PreprocessorDefinitions>
|
||||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<DisableSpecificWarnings>4146;4800;4996;4355;4624</DisableSpecificWarnings>
|
<DisableSpecificWarnings>4146;4800;4996;4355;4624</DisableSpecificWarnings>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
@@ -197,7 +197,7 @@
|
|||||||
<Optimization>MaxSpeed</Optimization>
|
<Optimization>MaxSpeed</Optimization>
|
||||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
<PreprocessorDefinitions>NOMINMAX</PreprocessorDefinitions>
|
<PreprocessorDefinitions>NOMINMAX;LLVM_2_9</PreprocessorDefinitions>
|
||||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<DisableSpecificWarnings>4146;4800;4996;4355;4624</DisableSpecificWarnings>
|
<DisableSpecificWarnings>4146;4800;4996;4355;4624</DisableSpecificWarnings>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
|||||||
32
llvmutil.cpp
32
llvmutil.cpp
@@ -116,7 +116,7 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
|||||||
|
|
||||||
for (int i = 0; i < target.vectorWidth; ++i)
|
for (int i = 0; i < target.vectorWidth; ++i)
|
||||||
maskOnes.push_back(onMask);
|
maskOnes.push_back(onMask);
|
||||||
LLVMMaskAllOn = llvm::ConstantVector::get(LLVMTypes::MaskType, maskOnes);
|
LLVMMaskAllOn = llvm::ConstantVector::get(maskOnes);
|
||||||
|
|
||||||
std::vector<llvm::Constant *> maskZeros;
|
std::vector<llvm::Constant *> maskZeros;
|
||||||
llvm::Constant *offMask = NULL;
|
llvm::Constant *offMask = NULL;
|
||||||
@@ -125,7 +125,7 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
|||||||
|
|
||||||
for (int i = 0; i < target.vectorWidth; ++i)
|
for (int i = 0; i < target.vectorWidth; ++i)
|
||||||
maskZeros.push_back(offMask);
|
maskZeros.push_back(offMask);
|
||||||
LLVMMaskAllOff = llvm::ConstantVector::get(LLVMTypes::MaskType, maskZeros);
|
LLVMMaskAllOff = llvm::ConstantVector::get(maskZeros);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -174,7 +174,7 @@ LLVMInt32Vector(int32_t ival) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(v);
|
vals.push_back(v);
|
||||||
return llvm::ConstantVector::get(LLVMTypes::Int32VectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -183,7 +183,7 @@ LLVMInt32Vector(const int32_t *ivec) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(LLVMInt32(ivec[i]));
|
vals.push_back(LLVMInt32(ivec[i]));
|
||||||
return llvm::ConstantVector::get(LLVMTypes::Int32VectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -193,7 +193,7 @@ LLVMUInt32Vector(uint32_t ival) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(v);
|
vals.push_back(v);
|
||||||
return llvm::ConstantVector::get(LLVMTypes::Int32VectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -202,7 +202,7 @@ LLVMUInt32Vector(const uint32_t *ivec) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(LLVMUInt32(ivec[i]));
|
vals.push_back(LLVMUInt32(ivec[i]));
|
||||||
return llvm::ConstantVector::get(LLVMTypes::Int32VectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -212,7 +212,7 @@ LLVMFloatVector(float fval) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(v);
|
vals.push_back(v);
|
||||||
return llvm::ConstantVector::get(LLVMTypes::FloatVectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -221,7 +221,7 @@ LLVMFloatVector(const float *fvec) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(LLVMFloat(fvec[i]));
|
vals.push_back(LLVMFloat(fvec[i]));
|
||||||
return llvm::ConstantVector::get(LLVMTypes::FloatVectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -231,7 +231,7 @@ LLVMDoubleVector(double dval) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(v);
|
vals.push_back(v);
|
||||||
return llvm::ConstantVector::get(LLVMTypes::DoubleVectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -240,7 +240,7 @@ LLVMDoubleVector(const double *dvec) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(LLVMDouble(dvec[i]));
|
vals.push_back(LLVMDouble(dvec[i]));
|
||||||
return llvm::ConstantVector::get(LLVMTypes::DoubleVectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -250,7 +250,7 @@ LLVMInt64Vector(int64_t ival) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(v);
|
vals.push_back(v);
|
||||||
return llvm::ConstantVector::get(LLVMTypes::Int64VectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -259,7 +259,7 @@ LLVMInt64Vector(const int64_t *ivec) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(LLVMInt64(ivec[i]));
|
vals.push_back(LLVMInt64(ivec[i]));
|
||||||
return llvm::ConstantVector::get(LLVMTypes::Int64VectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -269,7 +269,7 @@ LLVMUInt64Vector(uint64_t ival) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(v);
|
vals.push_back(v);
|
||||||
return llvm::ConstantVector::get(LLVMTypes::Int64VectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -278,7 +278,7 @@ LLVMUInt64Vector(const uint64_t *ivec) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(LLVMUInt64(ivec[i]));
|
vals.push_back(LLVMUInt64(ivec[i]));
|
||||||
return llvm::ConstantVector::get(LLVMTypes::Int64VectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -297,7 +297,7 @@ LLVMBoolVector(bool b) {
|
|||||||
std::vector<llvm::Constant *> vals;
|
std::vector<llvm::Constant *> vals;
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||||
vals.push_back(v);
|
vals.push_back(v);
|
||||||
return llvm::ConstantVector::get(LLVMTypes::BoolVectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -317,7 +317,7 @@ LLVMBoolVector(const bool *bvec) {
|
|||||||
|
|
||||||
vals.push_back(v);
|
vals.push_back(v);
|
||||||
}
|
}
|
||||||
return llvm::ConstantVector::get(LLVMTypes::BoolVectorType, vals);
|
return llvm::ConstantVector::get(vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
1
main.cpp
1
main.cpp
@@ -38,6 +38,7 @@
|
|||||||
#include "ispc.h"
|
#include "ispc.h"
|
||||||
#include "module.h"
|
#include "module.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <llvm/Support/PrettyStackTrace.h>
|
#include <llvm/Support/PrettyStackTrace.h>
|
||||||
#ifdef LLVM_2_8
|
#ifdef LLVM_2_8
|
||||||
#include <llvm/System/Signals.h>
|
#include <llvm/System/Signals.h>
|
||||||
|
|||||||
13
module.cpp
13
module.cpp
@@ -82,7 +82,9 @@
|
|||||||
#ifndef LLVM_2_8
|
#ifndef LLVM_2_8
|
||||||
#include <llvm/Support/ToolOutputFile.h>
|
#include <llvm/Support/ToolOutputFile.h>
|
||||||
#include <llvm/Support/Host.h>
|
#include <llvm/Support/Host.h>
|
||||||
#endif // !LLVM_2_8
|
#else // !LLVM_2_8
|
||||||
|
#include <llvm/System/Host.h>
|
||||||
|
#endif // LLVM_2_8
|
||||||
#include <llvm/Assembly/PrintModulePass.h>
|
#include <llvm/Assembly/PrintModulePass.h>
|
||||||
#include <llvm/Support/raw_ostream.h>
|
#include <llvm/Support/raw_ostream.h>
|
||||||
#include <llvm/Bitcode/ReaderWriter.h>
|
#include <llvm/Bitcode/ReaderWriter.h>
|
||||||
@@ -184,7 +186,7 @@ Module::CompileFile() {
|
|||||||
FATAL("Need to implement code to run the preprocessor for windows");
|
FATAL("Need to implement code to run the preprocessor for windows");
|
||||||
#else // ISPC_IS_WINDOWS
|
#else // ISPC_IS_WINDOWS
|
||||||
char *cmd = NULL;
|
char *cmd = NULL;
|
||||||
if (asprintf(&cmd, "/usr/bin/cpp -DISPC=1 -DPI=3.1415936535 %s %s",
|
if (asprintf(&cmd, "/usr/bin/cpp -DISPC=1 -DPI=3.1415926536 %s %s",
|
||||||
cppDefs.c_str(), filename ? filename : "-") == -1) {
|
cppDefs.c_str(), filename ? filename : "-") == -1) {
|
||||||
fprintf(stderr, "Unable to allocate memory in asprintf()?!\n");
|
fprintf(stderr, "Unable to allocate memory in asprintf()?!\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
@@ -663,19 +665,12 @@ lEmitFunctionCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
lCopyInTaskParameter(i, structParamPtr, decl, ctx);
|
lCopyInTaskParameter(i, structParamPtr, decl, ctx);
|
||||||
|
|
||||||
// Copy in the mask as well.
|
// Copy in the mask as well.
|
||||||
// FIXME: we may probably to check the mask at runtime and emit an
|
|
||||||
// 'all on' code path if it is all on, since that should be a
|
|
||||||
// common case.
|
|
||||||
#if 1
|
|
||||||
int nArgs = decl->functionArgs ? decl->functionArgs->size() : 0;
|
int nArgs = decl->functionArgs ? decl->functionArgs->size() : 0;
|
||||||
// The mask is the last parameter in the argument structure
|
// The mask is the last parameter in the argument structure
|
||||||
llvm::Value *ptr = ctx->GetElementPtrInst(structParamPtr, 0, nArgs,
|
llvm::Value *ptr = ctx->GetElementPtrInst(structParamPtr, 0, nArgs,
|
||||||
"task_struct_mask");
|
"task_struct_mask");
|
||||||
llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, "mask");
|
llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, "mask");
|
||||||
ctx->SetEntryMask(ptrval);
|
ctx->SetEntryMask(ptrval);
|
||||||
#else
|
|
||||||
Warning(funSym->pos, "Running task with all-on mask to start.");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Copy threadIndex and threadCount into stack-allocated storage so
|
// Copy threadIndex and threadCount into stack-allocated storage so
|
||||||
// that their symbols point to something reasonable.
|
// that their symbols point to something reasonable.
|
||||||
|
|||||||
11
opt.cpp
11
opt.cpp
@@ -1131,10 +1131,17 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
}
|
}
|
||||||
else if (maskAsInt == allOnMask) {
|
else if (maskAsInt == allOnMask) {
|
||||||
// The mask is all on, so turn this into a regular store
|
// The mask is all on, so turn this into a regular store
|
||||||
const llvm::Type *ptrType = llvm::PointerType::get(rvalue->getType(), 0);
|
const llvm::Type *rvalueType = rvalue->getType();
|
||||||
|
const llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0);
|
||||||
|
// Need to update this when int8/int16 are added
|
||||||
|
int align = (called == pms32Func || called == pms64Func ||
|
||||||
|
called == msb32Func) ? 4 : 8;
|
||||||
|
|
||||||
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
|
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
|
||||||
lCopyMetadata(lvalue, callInst);
|
lCopyMetadata(lvalue, callInst);
|
||||||
llvm::Instruction *store = new llvm::StoreInst(rvalue, lvalue);
|
llvm::Instruction *store =
|
||||||
|
new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
||||||
|
align);
|
||||||
lCopyMetadata(store, callInst);
|
lCopyMetadata(store, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, store);
|
llvm::ReplaceInstWithInst(callInst, store);
|
||||||
|
|
||||||
|
|||||||
@@ -513,14 +513,14 @@ declare <8 x float> @llvm.x86.avx.blendvps(<8 x float>, <8 x float>,
|
|||||||
define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
||||||
<8 x i32>) nounwind alwaysinline {
|
<8 x i32>) nounwind alwaysinline {
|
||||||
%mask_as_float = bitcast <8 x i32> %2 to <8 x float>
|
%mask_as_float = bitcast <8 x i32> %2 to <8 x float>
|
||||||
%oldValue = load <8 x i32>* %0
|
%oldValue = load <8 x i32>* %0, align 4
|
||||||
%oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
|
%oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
|
||||||
%newAsFloat = bitcast <8 x i32> %1 to <8 x float>
|
%newAsFloat = bitcast <8 x i32> %1 to <8 x float>
|
||||||
%blend = call <8 x float> @llvm.x86.avx.blendvps(<8 x float> %oldAsFloat,
|
%blend = call <8 x float> @llvm.x86.avx.blendvps(<8 x float> %oldAsFloat,
|
||||||
<8 x float> %newAsFloat,
|
<8 x float> %newAsFloat,
|
||||||
<8 x float> %mask_as_float)
|
<8 x float> %mask_as_float)
|
||||||
%blendAsInt = bitcast <8 x float> %blend to <8 x i32>
|
%blendAsInt = bitcast <8 x float> %blend to <8 x i32>
|
||||||
store <8 x i32> %blendAsInt, <8 x i32>* %0
|
store <8 x i32> %blendAsInt, <8 x i32>* %0, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -278,15 +278,15 @@ define internal float @__reduce_add_float(<4 x float> %v) nounwind readonly alwa
|
|||||||
|
|
||||||
define void @__masked_store_blend_32(<4 x i32>* nocapture, <4 x i32>,
|
define void @__masked_store_blend_32(<4 x i32>* nocapture, <4 x i32>,
|
||||||
<4 x i32> %mask) nounwind alwaysinline {
|
<4 x i32> %mask) nounwind alwaysinline {
|
||||||
%val = load <4 x i32> * %0
|
%val = load <4 x i32> * %0, align 4
|
||||||
%newval = call <4 x i32> @__vselect_i32(<4 x i32> %val, <4 x i32> %1, <4 x i32> %mask)
|
%newval = call <4 x i32> @__vselect_i32(<4 x i32> %val, <4 x i32> %1, <4 x i32> %mask)
|
||||||
store <4 x i32> %newval, <4 x i32> * %0
|
store <4 x i32> %newval, <4 x i32> * %0, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
|
define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
|
||||||
<4 x i32> %mask) nounwind alwaysinline {
|
<4 x i32> %mask) nounwind alwaysinline {
|
||||||
%oldValue = load <4 x i64>* %ptr
|
%oldValue = load <4 x i64>* %ptr, align 8
|
||||||
|
|
||||||
; Do 4x64-bit blends by doing two <4 x i32> blends, where the <4 x i32> values
|
; Do 4x64-bit blends by doing two <4 x i32> blends, where the <4 x i32> values
|
||||||
; are actually bitcast <2 x i64> values
|
; are actually bitcast <2 x i64> values
|
||||||
@@ -322,7 +322,7 @@ define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
|
|||||||
; reconstruct the final <4 x i64> vector
|
; reconstruct the final <4 x i64> vector
|
||||||
%final = shufflevector <2 x i64> %result01, <2 x i64> %result23,
|
%final = shufflevector <2 x i64> %result01, <2 x i64> %result23,
|
||||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
store <4 x i64> %final, <4 x i64> * %ptr
|
store <4 x i64> %final, <4 x i64> * %ptr, align 8
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -188,21 +188,21 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>,
|
|||||||
define void @__masked_store_blend_32(<4 x i32>* nocapture, <4 x i32>,
|
define void @__masked_store_blend_32(<4 x i32>* nocapture, <4 x i32>,
|
||||||
<4 x i32> %mask) nounwind alwaysinline {
|
<4 x i32> %mask) nounwind alwaysinline {
|
||||||
%mask_as_float = bitcast <4 x i32> %mask to <4 x float>
|
%mask_as_float = bitcast <4 x i32> %mask to <4 x float>
|
||||||
%oldValue = load <4 x i32>* %0
|
%oldValue = load <4 x i32>* %0, align 4
|
||||||
%oldAsFloat = bitcast <4 x i32> %oldValue to <4 x float>
|
%oldAsFloat = bitcast <4 x i32> %oldValue to <4 x float>
|
||||||
%newAsFloat = bitcast <4 x i32> %1 to <4 x float>
|
%newAsFloat = bitcast <4 x i32> %1 to <4 x float>
|
||||||
%blend = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %oldAsFloat,
|
%blend = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %oldAsFloat,
|
||||||
<4 x float> %newAsFloat,
|
<4 x float> %newAsFloat,
|
||||||
<4 x float> %mask_as_float)
|
<4 x float> %mask_as_float)
|
||||||
%blendAsInt = bitcast <4 x float> %blend to <4 x i32>
|
%blendAsInt = bitcast <4 x float> %blend to <4 x i32>
|
||||||
store <4 x i32> %blendAsInt, <4 x i32>* %0
|
store <4 x i32> %blendAsInt, <4 x i32>* %0, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
|
define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
|
||||||
<4 x i32> %i32mask) nounwind alwaysinline {
|
<4 x i32> %i32mask) nounwind alwaysinline {
|
||||||
%oldValue = load <4 x i64>* %ptr
|
%oldValue = load <4 x i64>* %ptr, align 8
|
||||||
%mask = bitcast <4 x i32> %i32mask to <4 x float>
|
%mask = bitcast <4 x i32> %i32mask to <4 x float>
|
||||||
|
|
||||||
; Do 4x64-bit blends by doing two <4 x i32> blends, where the <4 x i32> values
|
; Do 4x64-bit blends by doing two <4 x i32> blends, where the <4 x i32> values
|
||||||
@@ -243,6 +243,6 @@ define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
|
|||||||
; reconstruct the final <4 x i64> vector
|
; reconstruct the final <4 x i64> vector
|
||||||
%final = shufflevector <2 x i64> %result01, <2 x i64> %result23,
|
%final = shufflevector <2 x i64> %result01, <2 x i64> %result23,
|
||||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
store <4 x i64> %final, <4 x i64> * %ptr
|
store <4 x i64> %final, <4 x i64> * %ptr, align 8
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -566,7 +566,7 @@ define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
|||||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
%mask_b = shufflevector <8 x float> %mask_as_float, <8 x float> undef,
|
%mask_b = shufflevector <8 x float> %mask_as_float, <8 x float> undef,
|
||||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||||
%oldValue = load <8 x i32>* %0
|
%oldValue = load <8 x i32>* %0, align 4
|
||||||
%oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
|
%oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
|
||||||
%newAsFloat = bitcast <8 x i32> %1 to <8 x float>
|
%newAsFloat = bitcast <8 x i32> %1 to <8 x float>
|
||||||
%old_a = shufflevector <8 x float> %oldAsFloat, <8 x float> undef,
|
%old_a = shufflevector <8 x float> %oldAsFloat, <8 x float> undef,
|
||||||
@@ -584,7 +584,7 @@ define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
|||||||
%blend = shufflevector <4 x float> %blend_a, <4 x float> %blend_b,
|
%blend = shufflevector <4 x float> %blend_a, <4 x float> %blend_b,
|
||||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
%blendAsInt = bitcast <8 x float> %blend to <8 x i32>
|
%blendAsInt = bitcast <8 x float> %blend to <8 x i32>
|
||||||
store <8 x i32> %blendAsInt, <8 x i32>* %0
|
store <8 x i32> %blendAsInt, <8 x i32>* %0, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -595,7 +595,7 @@ define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
|||||||
|
|
||||||
%mask_as_float = bitcast <8 x i32> %mask to <8 x float>
|
%mask_as_float = bitcast <8 x i32> %mask to <8 x float>
|
||||||
|
|
||||||
%old = load <8 x i64>* %ptr
|
%old = load <8 x i64>* %ptr, align 8
|
||||||
|
|
||||||
; set up the first two 64-bit values
|
; set up the first two 64-bit values
|
||||||
%old01 = shufflevector <8 x i64> %old, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
|
%old01 = shufflevector <8 x i64> %old, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
|
||||||
@@ -651,7 +651,7 @@ define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
|||||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
%final = shufflevector <4 x i64> %final0123, <4 x i64> %final4567,
|
%final = shufflevector <4 x i64> %final0123, <4 x i64> %final4567,
|
||||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
store <8 x i64> %final, <8 x i64> * %ptr
|
store <8 x i64> %final, <8 x i64> * %ptr, align 8
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1087,8 +1087,8 @@ static inline float atan2(float y, float x) {
|
|||||||
}
|
}
|
||||||
else if (__math_lib == __math_lib_ispc ||
|
else if (__math_lib == __math_lib_ispc ||
|
||||||
__math_lib == __math_lib_ispc_fast) {
|
__math_lib == __math_lib_ispc_fast) {
|
||||||
const float pi_vec = 3.1415927410125732421875;
|
const float pi_vec = 3.1415926536;
|
||||||
const float pi_over_two_vec = 1.57079637050628662109375;
|
const float pi_over_two_vec = 1.5707963267;
|
||||||
// atan2(y, x) =
|
// atan2(y, x) =
|
||||||
//
|
//
|
||||||
// atan2(y > 0, x = +-0) -> Pi/2
|
// atan2(y > 0, x = +-0) -> Pi/2
|
||||||
|
|||||||
10
stdlib.m4
10
stdlib.m4
@@ -452,7 +452,7 @@ define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset) nounwind alw
|
|||||||
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
||||||
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
||||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
||||||
%val = load i`'eval(16*$1) * %ptr64, align 1
|
%val = load i`'eval(16*$1) * %ptr64, align 2
|
||||||
|
|
||||||
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
|
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
|
||||||
; unsigned, so use zero-extent...
|
; unsigned, so use zero-extent...
|
||||||
@@ -479,7 +479,7 @@ define internal void @__store_uint8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
|||||||
%oldmasked = and i`'eval(8*$1) %old, %notmask
|
%oldmasked = and i`'eval(8*$1) %old, %notmask
|
||||||
%newmasked = and i`'eval(8*$1) %val64, %mask64
|
%newmasked = and i`'eval(8*$1) %val64, %mask64
|
||||||
%final = or i`'eval(8*$1) %oldmasked, %newmasked
|
%final = or i`'eval(8*$1) %oldmasked, %newmasked
|
||||||
store i`'eval(8*$1) %final, i`'eval(8*$1) * %ptr64
|
store i`'eval(8*$1) %final, i`'eval(8*$1) * %ptr64, align 1
|
||||||
|
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
@@ -498,11 +498,11 @@ define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32
|
|||||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
||||||
|
|
||||||
;; as above, use mask to do blending with logical ops...
|
;; as above, use mask to do blending with logical ops...
|
||||||
%old = load i`'eval(16*$1) * %ptr64, align 1
|
%old = load i`'eval(16*$1) * %ptr64, align 2
|
||||||
%oldmasked = and i`'eval(16*$1) %old, %notmask
|
%oldmasked = and i`'eval(16*$1) %old, %notmask
|
||||||
%newmasked = and i`'eval(16*$1) %val64, %mask64
|
%newmasked = and i`'eval(16*$1) %val64, %mask64
|
||||||
%final = or i`'eval(16*$1) %oldmasked, %newmasked
|
%final = or i`'eval(16*$1) %oldmasked, %newmasked
|
||||||
store i`'eval(16*$1) %final, i`'eval(16*$1) * %ptr64
|
store i`'eval(16*$1) %final, i`'eval(16*$1) * %ptr64, align 2
|
||||||
|
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
@@ -544,7 +544,7 @@ all_on:
|
|||||||
;; vector load
|
;; vector load
|
||||||
%vecptr = bitcast i32 *%startptr to <$1 x i32> *
|
%vecptr = bitcast i32 *%startptr to <$1 x i32> *
|
||||||
%vec_load = load <$1 x i32> *%vecptr, align 4
|
%vec_load = load <$1 x i32> *%vecptr, align 4
|
||||||
store <$1 x i32> %vec_load, <$1 x i32> * %val_ptr
|
store <$1 x i32> %vec_load, <$1 x i32> * %val_ptr, align 4
|
||||||
ret i32 $1
|
ret i32 $1
|
||||||
|
|
||||||
not_all_on:
|
not_all_on:
|
||||||
|
|||||||
Reference in New Issue
Block a user