Finished updating alignment issues for vector types; don't assume pointers
are aligned to the natural vector width.
This commit is contained in:
38
ctx.cpp
38
ctx.cpp
@@ -1315,8 +1315,21 @@ FunctionEmitContext::LoadInst(llvm::Value *lvalue, const Type *type,
|
|||||||
|
|
||||||
if (llvm::isa<const llvm::PointerType>(lvalue->getType())) {
|
if (llvm::isa<const llvm::PointerType>(lvalue->getType())) {
|
||||||
// If the lvalue is a straight up regular pointer, then just issue
|
// If the lvalue is a straight up regular pointer, then just issue
|
||||||
// a regular load
|
// a regular load. First figure out the alignment; in general we
|
||||||
llvm::Instruction *inst = new llvm::LoadInst(lvalue, name ? name : "load", bblock);
|
// can just assume the natural alignment (0 here), but for varying
|
||||||
|
// atomic types, we need to make sure that the compiler emits
|
||||||
|
// unaligned vector loads, so we specify a reduced alignment here.
|
||||||
|
int align = 0;
|
||||||
|
const AtomicType *atomicType = dynamic_cast<const AtomicType *>(type);
|
||||||
|
if (atomicType != NULL && atomicType->IsVaryingType())
|
||||||
|
// We actually just want to align to the vector element
|
||||||
|
// alignment, but can't easily get that here, so just tell LLVM
|
||||||
|
// it's totally unaligned. (This shouldn't make any difference
|
||||||
|
// vs the proper alignment in practice.)
|
||||||
|
align = 1;
|
||||||
|
llvm::Instruction *inst = new llvm::LoadInst(lvalue, name ? name : "load",
|
||||||
|
false /* not volatile */,
|
||||||
|
align, bblock);
|
||||||
AddDebugPos(inst);
|
AddDebugPos(inst);
|
||||||
return inst;
|
return inst;
|
||||||
}
|
}
|
||||||
@@ -1644,8 +1657,16 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Instruction *inst = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
llvm::Instruction *inst;
|
||||||
4, bblock);
|
if (llvm::isa<llvm::VectorType>(rvalue->getType()))
|
||||||
|
// Specify an unaligned store, since we don't know that the lvalue
|
||||||
|
// will in fact be aligned to a vector width here. (Actually
|
||||||
|
// should be aligned to the alignment of the vector elment type...)
|
||||||
|
inst = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
||||||
|
1, bblock);
|
||||||
|
else
|
||||||
|
inst = new llvm::StoreInst(rvalue, lvalue, bblock);
|
||||||
|
|
||||||
AddDebugPos(inst);
|
AddDebugPos(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1662,9 +1683,8 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
|
|||||||
|
|
||||||
// Figure out what kind of store we're doing here
|
// Figure out what kind of store we're doing here
|
||||||
if (rvalueType->IsUniformType()) {
|
if (rvalueType->IsUniformType()) {
|
||||||
// The easy case; a regular store
|
// The easy case; a regular store, natural alignment is fine
|
||||||
llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
|
llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, bblock);
|
||||||
4, bblock);
|
|
||||||
AddDebugPos(si);
|
AddDebugPos(si);
|
||||||
}
|
}
|
||||||
else if (llvm::isa<const llvm::ArrayType>(lvalue->getType()))
|
else if (llvm::isa<const llvm::ArrayType>(lvalue->getType()))
|
||||||
@@ -1674,9 +1694,7 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
|
|||||||
else if (storeMask == LLVMMaskAllOn) {
|
else if (storeMask == LLVMMaskAllOn) {
|
||||||
// Otherwise it is a masked store unless we can determine that the
|
// Otherwise it is a masked store unless we can determine that the
|
||||||
// mask is all on...
|
// mask is all on...
|
||||||
llvm::Instruction *si =
|
StoreInst(rvalue, lvalue, name);
|
||||||
new llvm::StoreInst(rvalue, lvalue, false /*not volatile*/, 4, bblock);
|
|
||||||
AddDebugPos(si);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
maskedStore(rvalue, lvalue, rvalueType, storeMask);
|
maskedStore(rvalue, lvalue, rvalueType, storeMask);
|
||||||
|
|||||||
@@ -1970,7 +1970,7 @@ Data Layout
|
|||||||
|
|
||||||
In general, ``ispc`` tries to ensure that ``struct`` s and other complex
|
In general, ``ispc`` tries to ensure that ``struct`` s and other complex
|
||||||
datatypes are laid out in the same way in memory as they are in C/C++.
|
datatypes are laid out in the same way in memory as they are in C/C++.
|
||||||
Matching alignment is important for easy interoperability between C/C++
|
Matching structure layout is important for easy interoperability between C/C++
|
||||||
code and ``ispc`` code.
|
code and ``ispc`` code.
|
||||||
|
|
||||||
The main complexity in sharing data between ``ispc`` and C/C++ often comes
|
The main complexity in sharing data between ``ispc`` and C/C++ often comes
|
||||||
@@ -2023,11 +2023,6 @@ It can pass ``array`` to a ``ispc`` function defined as:
|
|||||||
|
|
||||||
export void foo(uniform float array[], uniform int count)
|
export void foo(uniform float array[], uniform int count)
|
||||||
|
|
||||||
(Though the pointer must be aligned to the compilation target's natural
|
|
||||||
vector width; see the discussion of alignment restrictions in `Data
|
|
||||||
Alignment and Aliasing`_ and the aligned allocation routines in
|
|
||||||
``examples/options/options.cpp`` for example.)
|
|
||||||
|
|
||||||
Similarly, ``struct`` s from the application can have embedded pointers.
|
Similarly, ``struct`` s from the application can have embedded pointers.
|
||||||
This is handled with similar ``[]`` syntax:
|
This is handled with similar ``[]`` syntax:
|
||||||
|
|
||||||
@@ -2062,55 +2057,20 @@ vector types from C/C++ application code if possible.
|
|||||||
Data Alignment and Aliasing
|
Data Alignment and Aliasing
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
There are two important constraints that must be adhered to when passing
|
There are are two important constraints that must be adhered to when
|
||||||
pointers from the application to ``ispc`` programs.
|
passing pointers from the application to ``ispc`` programs.
|
||||||
|
|
||||||
The first constraint is alignment: any pointers from the host program that
|
The first is that it is required that it be valid to read memory at the
|
||||||
are passed to ``ispc`` must be aligned to natural vector alignment of
|
first element of any array that is passed to ``ispc``. In practice, this
|
||||||
system--for example, 16 byte alignment on a target that supports Intel®
|
should just happen naturally, but it does mean that it is illegal to pass a
|
||||||
SSE, 32-byte on an Intel® AVX target. If this constraint isn't met, the
|
``NULL`` pointer as a parameter to a ``ispc`` function called from the
|
||||||
program may abort at runtime with an unaligned memory access error.
|
application.
|
||||||
|
|
||||||
For example, in a ``ispc`` function with the following declaration:
|
The second constraint is that pointers and references in ``ispc`` programs
|
||||||
|
must not alias. The ``ispc`` compiler assumes that different pointers
|
||||||
::
|
can't end up pointing to the same memory location, either due to having the
|
||||||
|
same initial value, or through array indexing in the program as it
|
||||||
export void foo(uniform float in[], uniform float out[],
|
executed.
|
||||||
int count);
|
|
||||||
|
|
||||||
If the application is passing stack-allocated arrays for ``in`` and
|
|
||||||
``out``, these C/C++ compiler must be told to align these arrays.
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
// MSVC, SSE target
|
|
||||||
__declspec(align(16)) float in[16], out[16];
|
|
||||||
foo(in, out, 16);
|
|
||||||
|
|
||||||
With the gcc/clang compilers, the syntax for providing alignment is
|
|
||||||
slightly different:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
float x[16] __attribute__ ((__align__(16)));
|
|
||||||
foo(in, out, 16);
|
|
||||||
|
|
||||||
If the data being passed is dynamically allocated, the appropriate system
|
|
||||||
aligned memory allocation routine should be used to allocate it (for
|
|
||||||
example, ``_aligned_malloc()`` with Windows\*, ``memalign()`` with
|
|
||||||
Linux\*; see the ``AllocAligned()`` function in ``examples/rt/rt.cpp`` for
|
|
||||||
an example.)
|
|
||||||
|
|
||||||
It is also required that it be valid to read memory at the first element of
|
|
||||||
any array that is passed to ``ispc``. In practice, this should just
|
|
||||||
happen naturally, but it does mean that it is illegal to pass a ``NULL``
|
|
||||||
pointer as a parameter to a ``ispc`` function called from the application.
|
|
||||||
|
|
||||||
The second key constraint is that pointers and references in ``ispc``
|
|
||||||
programs must not alias. The ``ispc`` compiler assumes that different
|
|
||||||
pointers can't end up pointing to the same memory location, either due to
|
|
||||||
having the same initial value, or through array indexing in the program as
|
|
||||||
it executed.
|
|
||||||
|
|
||||||
This aliasing constraint also applies to ``reference`` parameters to
|
This aliasing constraint also applies to ``reference`` parameters to
|
||||||
functions. Given a function like:
|
functions. Given a function like:
|
||||||
@@ -2127,8 +2087,8 @@ another case of aliasing, and if the caller calls the function as ``func(x,
|
|||||||
x)``, it's not guaranteed that the ``if`` test will evaluate to true, due
|
x)``, it's not guaranteed that the ``if`` test will evaluate to true, due
|
||||||
to the compiler's requirement of no aliasing.
|
to the compiler's requirement of no aliasing.
|
||||||
|
|
||||||
(In the future, ``ispc`` will have the ability to work with unaligned
|
(In the future, ``ispc`` will have a mechanism to indicate that pointers
|
||||||
memory as well as have a mechanism to indicate that pointers may alias.)
|
may alias.)
|
||||||
|
|
||||||
Using ISPC Effectively
|
Using ISPC Effectively
|
||||||
======================
|
======================
|
||||||
|
|||||||
@@ -38,15 +38,7 @@
|
|||||||
using namespace ispc;
|
using namespace ispc;
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
// Pointers passed to ispc-compiled code are currently required to have
|
float vin[16], vout[16];
|
||||||
// alignment equal to the target's native vector size. Here we align
|
|
||||||
// to 32 bytes to be safe for both SSE and AVX targets.
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
__declspec(align(32)) float vin[16], vout[16];
|
|
||||||
#else
|
|
||||||
float vin[16] __attribute__((aligned(32)));
|
|
||||||
float vout[16] __attribute__((aligned(32)));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Initialize input buffer
|
// Initialize input buffer
|
||||||
for (int i = 0; i < 16; ++i)
|
for (int i = 0; i < 16; ++i)
|
||||||
|
|||||||
Reference in New Issue
Block a user