From 865e430b56e0e926d516fbff83261d4eb95343b4 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 23 Jun 2011 18:51:15 -0700 Subject: [PATCH] Finished updating alignment issues for vector types; don't assume pointers are aligned to the natural vector width. --- ctx.cpp | 38 +++++++++++++++------ docs/ispc.txt | 70 ++++++++------------------------------ examples/simple/simple.cpp | 10 +----- 3 files changed, 44 insertions(+), 74 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index 9c72fd3d..12199dcb 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1315,8 +1315,21 @@ FunctionEmitContext::LoadInst(llvm::Value *lvalue, const Type *type, if (llvm::isa(lvalue->getType())) { // If the lvalue is a straight up regular pointer, then just issue - // a regular load - llvm::Instruction *inst = new llvm::LoadInst(lvalue, name ? name : "load", bblock); + // a regular load. First figure out the alignment; in general we + // can just assume the natural alignment (0 here), but for varying + // atomic types, we need to make sure that the compiler emits + // unaligned vector loads, so we specify a reduced alignment here. + int align = 0; + const AtomicType *atomicType = dynamic_cast(type); + if (atomicType != NULL && atomicType->IsVaryingType()) + // We actually just want to align to the vector element + // alignment, but can't easily get that here, so just tell LLVM + // it's totally unaligned. (This shouldn't make any difference + // vs the proper alignment in practice.) + align = 1; + llvm::Instruction *inst = new llvm::LoadInst(lvalue, name ? name : "load", + false /* not volatile */, + align, bblock); AddDebugPos(inst); return inst; } @@ -1644,8 +1657,16 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, return; } - llvm::Instruction *inst = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, - 4, bblock); + llvm::Instruction *inst; + if (llvm::isa(rvalue->getType())) + // Specify an unaligned store, since we don't know that the lvalue + // will in fact be aligned to a vector width here. (Actually + // should be aligned to the alignment of the vector elment type...) + inst = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, + 1, bblock); + else + inst = new llvm::StoreInst(rvalue, lvalue, bblock); + AddDebugPos(inst); } @@ -1662,9 +1683,8 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, // Figure out what kind of store we're doing here if (rvalueType->IsUniformType()) { - // The easy case; a regular store - llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, - 4, bblock); + // The easy case; a regular store, natural alignment is fine + llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, bblock); AddDebugPos(si); } else if (llvm::isa(lvalue->getType())) @@ -1674,9 +1694,7 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue, else if (storeMask == LLVMMaskAllOn) { // Otherwise it is a masked store unless we can determine that the // mask is all on... - llvm::Instruction *si = - new llvm::StoreInst(rvalue, lvalue, false /*not volatile*/, 4, bblock); - AddDebugPos(si); + StoreInst(rvalue, lvalue, name); } else maskedStore(rvalue, lvalue, rvalueType, storeMask); diff --git a/docs/ispc.txt b/docs/ispc.txt index 76f13595..8292f478 100644 --- a/docs/ispc.txt +++ b/docs/ispc.txt @@ -1970,7 +1970,7 @@ Data Layout In general, ``ispc`` tries to ensure that ``struct`` s and other complex datatypes are laid out in the same way in memory as they are in C/C++. -Matching alignment is important for easy interoperability between C/C++ +Matching structure layout is important for easy interoperability between C/C++ code and ``ispc`` code. The main complexity in sharing data between ``ispc`` and C/C++ often comes @@ -2023,11 +2023,6 @@ It can pass ``array`` to a ``ispc`` function defined as: export void foo(uniform float array[], uniform int count) -(Though the pointer must be aligned to the compilation target's natural -vector width; see the discussion of alignment restrictions in `Data -Alignment and Aliasing`_ and the aligned allocation routines in -``examples/options/options.cpp`` for example.) - Similarly, ``struct`` s from the application can have embedded pointers. This is handled with similar ``[]`` syntax: @@ -2062,55 +2057,20 @@ vector types from C/C++ application code if possible. Data Alignment and Aliasing --------------------------- -There are two important constraints that must be adhered to when passing -pointers from the application to ``ispc`` programs. +There are are two important constraints that must be adhered to when +passing pointers from the application to ``ispc`` programs. -The first constraint is alignment: any pointers from the host program that -are passed to ``ispc`` must be aligned to natural vector alignment of -system--for example, 16 byte alignment on a target that supports IntelĀ® -SSE, 32-byte on an IntelĀ® AVX target. If this constraint isn't met, the -program may abort at runtime with an unaligned memory access error. +The first is that it is required that it be valid to read memory at the +first element of any array that is passed to ``ispc``. In practice, this +should just happen naturally, but it does mean that it is illegal to pass a +``NULL`` pointer as a parameter to a ``ispc`` function called from the +application. -For example, in a ``ispc`` function with the following declaration: - -:: - - export void foo(uniform float in[], uniform float out[], - int count); - -If the application is passing stack-allocated arrays for ``in`` and -``out``, these C/C++ compiler must be told to align these arrays. - -:: - - // MSVC, SSE target - __declspec(align(16)) float in[16], out[16]; - foo(in, out, 16); - -With the gcc/clang compilers, the syntax for providing alignment is -slightly different: - -:: - - float x[16] __attribute__ ((__align__(16))); - foo(in, out, 16); - -If the data being passed is dynamically allocated, the appropriate system -aligned memory allocation routine should be used to allocate it (for -example, ``_aligned_malloc()`` with Windows\*, ``memalign()`` with -Linux\*; see the ``AllocAligned()`` function in ``examples/rt/rt.cpp`` for -an example.) - -It is also required that it be valid to read memory at the first element of -any array that is passed to ``ispc``. In practice, this should just -happen naturally, but it does mean that it is illegal to pass a ``NULL`` -pointer as a parameter to a ``ispc`` function called from the application. - -The second key constraint is that pointers and references in ``ispc`` -programs must not alias. The ``ispc`` compiler assumes that different -pointers can't end up pointing to the same memory location, either due to -having the same initial value, or through array indexing in the program as -it executed. +The second constraint is that pointers and references in ``ispc`` programs +must not alias. The ``ispc`` compiler assumes that different pointers +can't end up pointing to the same memory location, either due to having the +same initial value, or through array indexing in the program as it +executed. This aliasing constraint also applies to ``reference`` parameters to functions. Given a function like: @@ -2127,8 +2087,8 @@ another case of aliasing, and if the caller calls the function as ``func(x, x)``, it's not guaranteed that the ``if`` test will evaluate to true, due to the compiler's requirement of no aliasing. -(In the future, ``ispc`` will have the ability to work with unaligned -memory as well as have a mechanism to indicate that pointers may alias.) +(In the future, ``ispc`` will have a mechanism to indicate that pointers +may alias.) Using ISPC Effectively ====================== diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index 3b5bf028..9542532f 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -38,15 +38,7 @@ using namespace ispc; int main() { - // Pointers passed to ispc-compiled code are currently required to have - // alignment equal to the target's native vector size. Here we align - // to 32 bytes to be safe for both SSE and AVX targets. -#ifdef _MSC_VER - __declspec(align(32)) float vin[16], vout[16]; -#else - float vin[16] __attribute__((aligned(32))); - float vout[16] __attribute__((aligned(32))); -#endif + float vin[16], vout[16]; // Initialize input buffer for (int i = 0; i < 16; ++i)