Finished updating alignment issues for vector types; don't assume pointers
are aligned to the natural vector width.
This commit is contained in:
38
ctx.cpp
38
ctx.cpp
@@ -1315,8 +1315,21 @@ FunctionEmitContext::LoadInst(llvm::Value *lvalue, const Type *type,
|
||||
|
||||
if (llvm::isa<const llvm::PointerType>(lvalue->getType())) {
|
||||
// If the lvalue is a straight up regular pointer, then just issue
|
||||
// a regular load
|
||||
llvm::Instruction *inst = new llvm::LoadInst(lvalue, name ? name : "load", bblock);
|
||||
// a regular load. First figure out the alignment; in general we
|
||||
// can just assume the natural alignment (0 here), but for varying
|
||||
// atomic types, we need to make sure that the compiler emits
|
||||
// unaligned vector loads, so we specify a reduced alignment here.
|
||||
int align = 0;
|
||||
const AtomicType *atomicType = dynamic_cast<const AtomicType *>(type);
|
||||
if (atomicType != NULL && atomicType->IsVaryingType())
|
||||
// We actually just want to align to the vector element
|
||||
// alignment, but can't easily get that here, so just tell LLVM
|
||||
// it's totally unaligned. (This shouldn't make any difference
|
||||
// vs the proper alignment in practice.)
|
||||
align = 1;
|
||||
llvm::Instruction *inst = new llvm::LoadInst(lvalue, name ? name : "load",
|
||||
false /* not volatile */,
|
||||
align, bblock);
|
||||
AddDebugPos(inst);
|
||||
return inst;
|
||||
}
|
||||
@@ -1644,8 +1657,16 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
return;
|
||||
}
|
||||
|
||||
llvm::Instruction *inst = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
||||
4, bblock);
|
||||
llvm::Instruction *inst;
|
||||
if (llvm::isa<llvm::VectorType>(rvalue->getType()))
|
||||
// Specify an unaligned store, since we don't know that the lvalue
|
||||
// will in fact be aligned to a vector width here. (Actually
|
||||
// should be aligned to the alignment of the vector elment type...)
|
||||
inst = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
||||
1, bblock);
|
||||
else
|
||||
inst = new llvm::StoreInst(rvalue, lvalue, bblock);
|
||||
|
||||
AddDebugPos(inst);
|
||||
}
|
||||
|
||||
@@ -1662,9 +1683,8 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
|
||||
// Figure out what kind of store we're doing here
|
||||
if (rvalueType->IsUniformType()) {
|
||||
// The easy case; a regular store
|
||||
llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
||||
4, bblock);
|
||||
// The easy case; a regular store, natural alignment is fine
|
||||
llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, bblock);
|
||||
AddDebugPos(si);
|
||||
}
|
||||
else if (llvm::isa<const llvm::ArrayType>(lvalue->getType()))
|
||||
@@ -1674,9 +1694,7 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
else if (storeMask == LLVMMaskAllOn) {
|
||||
// Otherwise it is a masked store unless we can determine that the
|
||||
// mask is all on...
|
||||
llvm::Instruction *si =
|
||||
new llvm::StoreInst(rvalue, lvalue, false /*not volatile*/, 4, bblock);
|
||||
AddDebugPos(si);
|
||||
StoreInst(rvalue, lvalue, name);
|
||||
}
|
||||
else
|
||||
maskedStore(rvalue, lvalue, rvalueType, storeMask);
|
||||
|
||||
@@ -1970,7 +1970,7 @@ Data Layout
|
||||
|
||||
In general, ``ispc`` tries to ensure that ``struct`` s and other complex
|
||||
datatypes are laid out in the same way in memory as they are in C/C++.
|
||||
Matching alignment is important for easy interoperability between C/C++
|
||||
Matching structure layout is important for easy interoperability between C/C++
|
||||
code and ``ispc`` code.
|
||||
|
||||
The main complexity in sharing data between ``ispc`` and C/C++ often comes
|
||||
@@ -2023,11 +2023,6 @@ It can pass ``array`` to a ``ispc`` function defined as:
|
||||
|
||||
export void foo(uniform float array[], uniform int count)
|
||||
|
||||
(Though the pointer must be aligned to the compilation target's natural
|
||||
vector width; see the discussion of alignment restrictions in `Data
|
||||
Alignment and Aliasing`_ and the aligned allocation routines in
|
||||
``examples/options/options.cpp`` for example.)
|
||||
|
||||
Similarly, ``struct`` s from the application can have embedded pointers.
|
||||
This is handled with similar ``[]`` syntax:
|
||||
|
||||
@@ -2062,55 +2057,20 @@ vector types from C/C++ application code if possible.
|
||||
Data Alignment and Aliasing
|
||||
---------------------------
|
||||
|
||||
There are two important constraints that must be adhered to when passing
|
||||
pointers from the application to ``ispc`` programs.
|
||||
There are are two important constraints that must be adhered to when
|
||||
passing pointers from the application to ``ispc`` programs.
|
||||
|
||||
The first constraint is alignment: any pointers from the host program that
|
||||
are passed to ``ispc`` must be aligned to natural vector alignment of
|
||||
system--for example, 16 byte alignment on a target that supports Intel®
|
||||
SSE, 32-byte on an Intel® AVX target. If this constraint isn't met, the
|
||||
program may abort at runtime with an unaligned memory access error.
|
||||
The first is that it is required that it be valid to read memory at the
|
||||
first element of any array that is passed to ``ispc``. In practice, this
|
||||
should just happen naturally, but it does mean that it is illegal to pass a
|
||||
``NULL`` pointer as a parameter to a ``ispc`` function called from the
|
||||
application.
|
||||
|
||||
For example, in a ``ispc`` function with the following declaration:
|
||||
|
||||
::
|
||||
|
||||
export void foo(uniform float in[], uniform float out[],
|
||||
int count);
|
||||
|
||||
If the application is passing stack-allocated arrays for ``in`` and
|
||||
``out``, these C/C++ compiler must be told to align these arrays.
|
||||
|
||||
::
|
||||
|
||||
// MSVC, SSE target
|
||||
__declspec(align(16)) float in[16], out[16];
|
||||
foo(in, out, 16);
|
||||
|
||||
With the gcc/clang compilers, the syntax for providing alignment is
|
||||
slightly different:
|
||||
|
||||
::
|
||||
|
||||
float x[16] __attribute__ ((__align__(16)));
|
||||
foo(in, out, 16);
|
||||
|
||||
If the data being passed is dynamically allocated, the appropriate system
|
||||
aligned memory allocation routine should be used to allocate it (for
|
||||
example, ``_aligned_malloc()`` with Windows\*, ``memalign()`` with
|
||||
Linux\*; see the ``AllocAligned()`` function in ``examples/rt/rt.cpp`` for
|
||||
an example.)
|
||||
|
||||
It is also required that it be valid to read memory at the first element of
|
||||
any array that is passed to ``ispc``. In practice, this should just
|
||||
happen naturally, but it does mean that it is illegal to pass a ``NULL``
|
||||
pointer as a parameter to a ``ispc`` function called from the application.
|
||||
|
||||
The second key constraint is that pointers and references in ``ispc``
|
||||
programs must not alias. The ``ispc`` compiler assumes that different
|
||||
pointers can't end up pointing to the same memory location, either due to
|
||||
having the same initial value, or through array indexing in the program as
|
||||
it executed.
|
||||
The second constraint is that pointers and references in ``ispc`` programs
|
||||
must not alias. The ``ispc`` compiler assumes that different pointers
|
||||
can't end up pointing to the same memory location, either due to having the
|
||||
same initial value, or through array indexing in the program as it
|
||||
executed.
|
||||
|
||||
This aliasing constraint also applies to ``reference`` parameters to
|
||||
functions. Given a function like:
|
||||
@@ -2127,8 +2087,8 @@ another case of aliasing, and if the caller calls the function as ``func(x,
|
||||
x)``, it's not guaranteed that the ``if`` test will evaluate to true, due
|
||||
to the compiler's requirement of no aliasing.
|
||||
|
||||
(In the future, ``ispc`` will have the ability to work with unaligned
|
||||
memory as well as have a mechanism to indicate that pointers may alias.)
|
||||
(In the future, ``ispc`` will have a mechanism to indicate that pointers
|
||||
may alias.)
|
||||
|
||||
Using ISPC Effectively
|
||||
======================
|
||||
|
||||
@@ -38,15 +38,7 @@
|
||||
using namespace ispc;
|
||||
|
||||
int main() {
|
||||
// Pointers passed to ispc-compiled code are currently required to have
|
||||
// alignment equal to the target's native vector size. Here we align
|
||||
// to 32 bytes to be safe for both SSE and AVX targets.
|
||||
#ifdef _MSC_VER
|
||||
__declspec(align(32)) float vin[16], vout[16];
|
||||
#else
|
||||
float vin[16] __attribute__((aligned(32)));
|
||||
float vout[16] __attribute__((aligned(32)));
|
||||
#endif
|
||||
float vin[16], vout[16];
|
||||
|
||||
// Initialize input buffer
|
||||
for (int i = 0; i < 16; ++i)
|
||||
|
||||
Reference in New Issue
Block a user