From 865e430b56e0e926d516fbff83261d4eb95343b4 Mon Sep 17 00:00:00 2001
From: Matt Pharr <matt.pharr@intel.com>
Date: Thu, 23 Jun 2011 18:51:15 -0700
Subject: [PATCH] Finished updating alignment issues for vector types; don't
 assume pointers are aligned to the natural vector width.

---
 ctx.cpp                    | 38 +++++++++++++++------
 docs/ispc.txt              | 70 ++++++++------------------------------
 examples/simple/simple.cpp | 10 +-----
 3 files changed, 44 insertions(+), 74 deletions(-)
diff --git a/ctx.cpp b/ctx.cpp
index 9c72fd3d..12199dcb 100644
--- a/ctx.cpp
+++ b/ctx.cpp
@@ -1315,8 +1315,21 @@ FunctionEmitContext::LoadInst(llvm::Value *lvalue, const Type *type,
 
     if (llvm::isa<const llvm::PointerType>(lvalue->getType())) {
         // If the lvalue is a straight up regular pointer, then just issue
-        // a regular load
-        llvm::Instruction *inst = new llvm::LoadInst(lvalue, name ? name : "load", bblock);
+        // a regular load.  First figure out the alignment; in general we
+        // can just assume the natural alignment (0 here), but for varying
+        // atomic types, we need to make sure that the compiler emits
+        // unaligned vector loads, so we specify a reduced alignment here.
+        int align = 0;
+        const AtomicType *atomicType = dynamic_cast<const AtomicType *>(type);
+        if (atomicType != NULL && atomicType->IsVaryingType())
+            // We actually just want to align to the vector element
+            // alignment, but can't easily get that here, so just tell LLVM
+            // it's totally unaligned.  (This shouldn't make any difference
+            // vs the proper alignment in practice.)
+            align = 1;
+        llvm::Instruction *inst = new llvm::LoadInst(lvalue, name ? name : "load",
+                                                     false /* not volatile */,
+                                                     align, bblock);
         AddDebugPos(inst);
         return inst;
     }
@@ -1644,8 +1657,16 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
         return;
     }
 
-    llvm::Instruction *inst = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
-                                                  4, bblock);
+    llvm::Instruction *inst;
+    if (llvm::isa<llvm::VectorType>(rvalue->getType()))
+        // Specify an unaligned store, since we don't know that the lvalue
+        // will in fact be aligned to a vector width here.  (Actually
+        // should be aligned to the alignment of the vector elment type...)
+        inst = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
+                                   1, bblock);
+    else
+        inst = new llvm::StoreInst(rvalue, lvalue, bblock);
+
     AddDebugPos(inst);
 }
 
@@ -1662,9 +1683,8 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
 
     // Figure out what kind of store we're doing here
     if (rvalueType->IsUniformType()) {
-        // The easy case; a regular store
-        llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, false /* not volatile */,
-                                                    4, bblock);
+        // The easy case; a regular store, natural alignment is fine
+        llvm::Instruction *si = new llvm::StoreInst(rvalue, lvalue, bblock);
         AddDebugPos(si);
     }
     else if (llvm::isa<const llvm::ArrayType>(lvalue->getType()))
@@ -1674,9 +1694,7 @@ FunctionEmitContext::StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
     else if (storeMask == LLVMMaskAllOn) {
         // Otherwise it is a masked store unless we can determine that the
         // mask is all on...
-        llvm::Instruction *si = 
-            new llvm::StoreInst(rvalue, lvalue, false /*not volatile*/, 4, bblock);
-        AddDebugPos(si);
+        StoreInst(rvalue, lvalue, name);
     }
     else
         maskedStore(rvalue, lvalue, rvalueType, storeMask);
diff --git a/docs/ispc.txt b/docs/ispc.txt
index 76f13595..8292f478 100644
--- a/docs/ispc.txt
+++ b/docs/ispc.txt
@@ -1970,7 +1970,7 @@ Data Layout
 
 In general, ``ispc`` tries to ensure that ``struct`` s and other complex
 datatypes are laid out in the same way in memory as they are in C/C++.
-Matching alignment is important for easy interoperability between C/C++
+Matching structure layout is important for easy interoperability between C/C++
 code and ``ispc`` code.
 
 The main complexity in sharing data between ``ispc`` and C/C++ often comes
@@ -2023,11 +2023,6 @@ It can pass ``array`` to a ``ispc`` function defined as:
 
    export void foo(uniform float array[], uniform int count)
 
-(Though the pointer must be aligned to the compilation target's natural
-vector width; see the discussion of alignment restrictions in `Data
-Alignment and Aliasing`_ and the aligned allocation routines in
-``examples/options/options.cpp`` for example.)
-
 Similarly, ``struct`` s from the application can have embedded pointers.
 This is handled with similar ``[]`` syntax:
 
@@ -2062,55 +2057,20 @@ vector types from C/C++ application code if possible.
 Data Alignment and Aliasing
 ---------------------------
 
-There are two important constraints that must be adhered to when passing
-pointers from the application to ``ispc`` programs.
+There are are two important constraints that must be adhered to when
+passing pointers from the application to ``ispc`` programs.
 
-The first constraint is alignment: any pointers from the host program that
-are passed to ``ispc`` must be aligned to natural vector alignment of
-system--for example, 16 byte alignment on a target that supports Intel®
-SSE, 32-byte on an Intel® AVX target.  If this constraint isn't met, the
-program may abort at runtime with an unaligned memory access error.
+The first is that it is required that it be valid to read memory at the
+first element of any array that is passed to ``ispc``.  In practice, this
+should just happen naturally, but it does mean that it is illegal to pass a
+``NULL`` pointer as a parameter to a ``ispc`` function called from the
+application.
 
-For example, in a ``ispc`` function with the following declaration:
-
-::
-
-    export void foo(uniform float in[], uniform float out[],
-                    int count);
-
-If the application is passing stack-allocated arrays for ``in`` and
-``out``, these C/C++ compiler must be told to align these arrays.
-
-::
-
-    // MSVC, SSE target
-    __declspec(align(16)) float in[16], out[16];
-    foo(in, out, 16);
-
-With the gcc/clang compilers, the syntax for providing alignment is
-slightly different:
-
-::
-
-    float x[16] __attribute__ ((__align__(16)));
-    foo(in, out, 16);
-
-If the data being passed is dynamically allocated, the appropriate system
-aligned memory allocation routine should be used to allocate it (for
-example, ``_aligned_malloc()`` with Windows\*, ``memalign()`` with
-Linux\*; see the ``AllocAligned()`` function in ``examples/rt/rt.cpp`` for
-an example.)
-
-It is also required that it be valid to read memory at the first element of
-any array that is passed to ``ispc``.  In practice, this should just
-happen naturally, but it does mean that it is illegal to pass a ``NULL``
-pointer as a parameter to a ``ispc`` function called from the application.
-
-The second key constraint is that pointers and references in ``ispc``
-programs must not alias.  The ``ispc`` compiler assumes that different
-pointers can't end up pointing to the same memory location, either due to
-having the same initial value, or through array indexing in the program as
-it executed.
+The second constraint is that pointers and references in ``ispc`` programs
+must not alias.  The ``ispc`` compiler assumes that different pointers
+can't end up pointing to the same memory location, either due to having the
+same initial value, or through array indexing in the program as it
+executed.
 
 This aliasing constraint also applies to ``reference`` parameters to
 functions.  Given a function like:
@@ -2127,8 +2087,8 @@ another case of aliasing, and if the caller calls the function as ``func(x,
 x)``, it's not guaranteed that the ``if`` test will evaluate to true, due
 to the compiler's requirement of no aliasing.
 
-(In the future, ``ispc`` will have the ability to work with unaligned
-memory as well as have a mechanism to indicate that pointers may alias.)
+(In the future, ``ispc`` will have a mechanism to indicate that pointers
+may alias.)
 
 Using ISPC Effectively
 ======================
diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp
index 3b5bf028..9542532f 100644
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@@ -38,15 +38,7 @@
 using namespace ispc;
 
 int main() {
-    // Pointers passed to ispc-compiled code are currently required to have
-    // alignment equal to the target's native vector size.  Here we align
-    // to 32 bytes to be safe for both SSE and AVX targets.
-#ifdef _MSC_VER
-    __declspec(align(32)) float vin[16], vout[16];
-#else
-    float vin[16] __attribute__((aligned(32)));
-    float vout[16] __attribute__((aligned(32)));
-#endif
+    float vin[16], vout[16];
 
     // Initialize input buffer
     for (int i = 0; i < 16; ++i)