diff --git a/Makefile b/Makefile
index c37ecda0..a1354df2 100644
--- a/Makefile
+++ b/Makefile
@@ -56,7 +56,7 @@ endif
 ARCH_TYPE = $(shell arch)
 
 LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
-LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
+LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn// -e s/\.0//)
 LLVM_VERSION_DEF=-D$(LLVM_VERSION)
 
 LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker nvptx
@@ -119,9 +119,9 @@ CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE)  \
 	$(LLVM_VERSION_DEF) \
 	-Wall \
 	-DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\"" \
-	-Wno-sign-compare -Wno-unused-function
-ifneq ($(LLVM_VERSION),LLVM_3_1)
-	CXXFLAGS+=-Werror
+	-Wno-sign-compare -Wno-unused-function -Werror
+ifeq ($(LLVM_VERSION),LLVM_3_5)
+	CXXFLAGS+=-std=c++11 -Wno-c99-extensions -Wno-deprecated-register
 endif
 ifneq ($(ARM_ENABLED), 0)
     CXXFLAGS+=-DISPC_ARM_ENABLED
diff --git a/ast.cpp b/ast.cpp
index 60b20a80..19eff152 100644
--- a/ast.cpp
+++ b/ast.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2011-2012, Intel Corporation
+  Copyright (c) 2011-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/ast.h b/ast.h
index d98c1d37..e0f864ba 100644
--- a/ast.h
+++ b/ast.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2011-2012, Intel Corporation
+  Copyright (c) 2011-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/builtins.cpp b/builtins.cpp
index 33788515..3fc89e85 100644
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2013, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -66,7 +66,11 @@
   #include <llvm/IR/Intrinsics.h>
   #include <llvm/IR/DerivedTypes.h>
 #endif
-#include <llvm/Linker.h>
+#if defined(LLVM_3_5)
+    #include <llvm/Linker/Linker.h>
+#else
+    #include <llvm/Linker.h>
+#endif
 #include <llvm/Target/TargetMachine.h>
 #include <llvm/ADT/Triple.h>
 #include <llvm/Support/MemoryBuffer.h>
@@ -817,6 +821,17 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
             // architecture and investigate what happened.
             // Generally we allow library DataLayout to be subset of module
             // DataLayout or library DataLayout to be empty.
+#if defined(LLVM_3_5)
+            if (!VerifyDataLayoutCompatibility(module->getDataLayoutStr(),
+                                               bcModule->getDataLayoutStr())) {
+              Warning(SourcePos(), "Module DataLayout is incompatible with "
+                      "library DataLayout:\n"
+                      "Module  DL: %s\n"
+                      "Library DL: %s\n",
+                      module->getDataLayoutStr().c_str(),
+                      bcModule->getDataLayoutStr().c_str());
+            }
+#else
             if (!VerifyDataLayoutCompatibility(module->getDataLayout(),
                                                bcModule->getDataLayout())) {
               Warning(SourcePos(), "Module DataLayout is incompatible with "
@@ -826,6 +841,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
                       module->getDataLayout().c_str(),
                       bcModule->getDataLayout().c_str());
             }
+#endif
         }
 
         bcModule->setTargetTriple(mTriple.str());
diff --git a/builtins.h b/builtins.h
index 14f3896e..28f58430 100644
--- a/builtins.h
+++ b/builtins.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/builtins/builtins.c b/builtins/builtins.c
index e7becf90..b65feb9a 100644
--- a/builtins/builtins.c
+++ b/builtins/builtins.c
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2012, Intel Corporation
+  Copyright (c) 2010-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-avx-common.ll b/builtins/target-avx-common.ll
index a63dd7b2..48a56bd9 100644
--- a/builtins/target-avx-common.ll
+++ b/builtins/target-avx-common.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2013, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
@@ -203,49 +203,51 @@ define void @__fastmath() nounwind alwaysinline {
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max
 
-declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
-declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
-
 define float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1)
+  %cmp = fcmp ogt float %1, %0
+  %ret = select i1 %cmp, float %1, float %0
   ret float %ret
 }
 
 define float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1)
+  %cmp = fcmp ogt float %1, %0
+  %ret = select i1 %cmp, float %0, float %1
   ret float %ret
 }
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; double precision min/max
 
-declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
-declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
-
 define double @__min_uniform_double(double, double) nounwind readnone alwaysinline {
-  sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.sd, %0, %1)
+  %cmp = fcmp ogt double %1, %0
+  %ret = select i1 %cmp, double %0, double %1
   ret double %ret
 }
 
 define double @__max_uniform_double(double, double) nounwind readnone alwaysinline {
-  sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1)
+  %cmp = fcmp ogt double %1, %0
+  %ret = select i1 %cmp, double %1, double %0
   ret double %ret
 }
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; int min/max
 
-declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
-
 define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminsd, %0, %1)
+  %cmp = icmp sgt i32 %1, %0
+  %ret = select i1 %cmp, i32 %0, i32 %1
   ret i32 %ret
 }
 
 define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
+  %cmp = icmp sgt i32 %1, %0
+  %ret = select i1 %cmp, i32 %1, i32 %0
   ret i32 %ret
 }
 
@@ -253,16 +255,15 @@ define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; unsigned int min/max
 
-declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
-
 define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminud, %0, %1)
+  %cmp = icmp ugt i32 %1, %0
+  %ret = select i1 %cmp, i32 %0, i32 %1
   ret i32 %ret
 }
 
 define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
-  sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxud, %0, %1)
+  %cmp = icmp ugt i32 %1, %0
+  %ret = select i1 %cmp, i32 %1, i32 %0
   ret i32 %ret
 }
 
diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll
index 69026515..5bdc547c 100644
--- a/builtins/target-avx-x2.ll
+++ b/builtins/target-avx-x2.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2012, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll
index e0f4e45d..aa120260 100644
--- a/builtins/target-avx.ll
+++ b/builtins/target-avx.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2012, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-avx1-i64x4base.ll b/builtins/target-avx1-i64x4base.ll
index 004a8702..8f23e51c 100644
--- a/builtins/target-avx1-i64x4base.ll
+++ b/builtins/target-avx1-i64x4base.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2013, Intel Corporation
+;;  Copyright (c) 2013-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-avx1-x2.ll b/builtins/target-avx1-x2.ll
index 562d7ff0..a278e6f9 100644
--- a/builtins/target-avx1-x2.ll
+++ b/builtins/target-avx1-x2.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2011, Intel Corporation
+;;  Copyright (c) 2010-2012, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-avx1.ll b/builtins/target-avx1.ll
index a9ddc112..8aaede89 100644
--- a/builtins/target-avx1.ll
+++ b/builtins/target-avx1.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2011, Intel Corporation
+;;  Copyright (c) 2010-2013, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-avx11-x2.ll b/builtins/target-avx11-x2.ll
index 1aa6345c..3da9c890 100644
--- a/builtins/target-avx11-x2.ll
+++ b/builtins/target-avx11-x2.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2012, Intel Corporation
+;;  Copyright (c) 2012-2013, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-avx11.ll b/builtins/target-avx11.ll
index c4c421a0..dd615779 100644
--- a/builtins/target-avx11.ll
+++ b/builtins/target-avx11.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2012, Intel Corporation
+;;  Copyright (c) 2012-2013, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-avx2-x2.ll b/builtins/target-avx2-x2.ll
index 053fd078..4eb6720e 100644
--- a/builtins/target-avx2-x2.ll
+++ b/builtins/target-avx2-x2.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2012, Intel Corporation
+;;  Copyright (c) 2010-2013, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-avx2.ll b/builtins/target-avx2.ll
index 20ecef47..c9e21e65 100644
--- a/builtins/target-avx2.ll
+++ b/builtins/target-avx2.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2012, Intel Corporation
+;;  Copyright (c) 2010-2013, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll
index 15f3ed80..06121a6c 100644
--- a/builtins/target-generic-1.ll
+++ b/builtins/target-generic-1.ll
@@ -1,3 +1,35 @@
+;;  Copyright (c) 2012-2013, Intel Corporation
+;;  All rights reserved.
+;;
+;;  Redistribution and use in source and binary forms, with or without
+;;  modification, are permitted provided that the following conditions are
+;;  met:
+;;
+;;    * Redistributions of source code must retain the above copyright
+;;      notice, this list of conditions and the following disclaimer.
+;;
+;;    * Redistributions in binary form must reproduce the above copyright
+;;      notice, this list of conditions and the following disclaimer in the
+;;      documentation and/or other materials provided with the distribution.
+;;
+;;    * Neither the name of Intel Corporation nor the names of its
+;;      contributors may be used to endorse or promote products derived from
+;;      this software without specific prior written permission.
+;;
+;;
+;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+;;   IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+;;   TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+;;   PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+;;   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;;   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;;   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;;   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
+
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Define the standard library builtins for the NOVEC target
 define(`MASK',`i32')
diff --git a/builtins/target-generic-16.ll b/builtins/target-generic-16.ll
index cc5644bc..9fe0dae6 100644
--- a/builtins/target-generic-16.ll
+++ b/builtins/target-generic-16.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2011, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-generic-32.ll b/builtins/target-generic-32.ll
index 8eb31c48..cc895c28 100644
--- a/builtins/target-generic-32.ll
+++ b/builtins/target-generic-32.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2012, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-generic-4.ll b/builtins/target-generic-4.ll
index d80c5b91..8ed18f67 100644
--- a/builtins/target-generic-4.ll
+++ b/builtins/target-generic-4.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2011, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-generic-64.ll b/builtins/target-generic-64.ll
index 6a044c41..5ab429fc 100644
--- a/builtins/target-generic-64.ll
+++ b/builtins/target-generic-64.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2012, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-generic-8.ll b/builtins/target-generic-8.ll
index 4353658c..47a7fe71 100644
--- a/builtins/target-generic-8.ll
+++ b/builtins/target-generic-8.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2011, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll
index c560e241..2abaf9b3 100644
--- a/builtins/target-generic-common.ll
+++ b/builtins/target-generic-common.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2012, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-sse2-common.ll b/builtins/target-sse2-common.ll
index b20fdfb4..bf59b230 100644
--- a/builtins/target-sse2-common.ll
+++ b/builtins/target-sse2-common.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2011, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll
index 4bee3241..2707134b 100644
--- a/builtins/target-sse2-x2.ll
+++ b/builtins/target-sse2-x2.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2012, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll
index 7f82f933..0f13b46f 100644
--- a/builtins/target-sse2.ll
+++ b/builtins/target-sse2.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2012, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll
index 70e3d01e..6dc81308 100644
--- a/builtins/target-sse4-x2.ll
+++ b/builtins/target-sse4-x2.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2012, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll
index 18f0d80e..59e80a24 100644
--- a/builtins/target-sse4.ll
+++ b/builtins/target-sse4.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2012, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
diff --git a/builtins/util.m4 b/builtins/util.m4
index 6c0abfdf..7fd69dec 100644
--- a/builtins/util.m4
+++ b/builtins/util.m4
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2013, Intel Corporation
+;;  Copyright (c) 2010-2014, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
@@ -1497,7 +1497,12 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
   per_lane($1, <$1 x MASK> %mask, `
    %cmp_LANE_ID = extractelement <$1 x $2> %cmp, i32 LANE
    %val_LANE_ID = extractelement <$1 x $2> %val, i32 LANE
-   %r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst
+  ifelse(LLVM_VERSION,LLVM_3_5,`
+    %r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst seq_cst
+  ',`
+    %r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst
+  ')
+
    %rp_LANE_ID = getelementptr $2 * %rptr32, i32 LANE
    store $2 %r_LANE_ID, $2 * %rp_LANE_ID')
 
@@ -1507,7 +1512,11 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp,
 
 define $2 @__atomic_compare_exchange_uniform_$3_global($2* %ptr, $2 %cmp,
                                                        $2 %val) nounwind alwaysinline {
-  %r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst
+  ifelse(LLVM_VERSION,LLVM_3_5,`
+   %r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst seq_cst
+  ',`
+   %r = cmpxchg $2 * %ptr, $2 %cmp, $2 %val seq_cst
+  ')
   ret $2 %r
 }
 ')
diff --git a/cbackend.cpp b/cbackend.cpp
index 6465d466..cb56cb82 100644
--- a/cbackend.cpp
+++ b/cbackend.cpp
@@ -66,9 +66,15 @@
 #if defined(LLVM_3_5)
     #include "llvm/IR/Verifier.h"
     #include <llvm/IR/IRPrintingPasses.h>
+    #include "llvm/IR/CallSite.h"
+    #include "llvm/IR/CFG.h"
+    #include "llvm/IR/GetElementPtrTypeIterator.h"
 #else
     #include "llvm/Analysis/Verifier.h"
     #include <llvm/Assembly/PrintModulePass.h>
+    #include "llvm/Support/CallSite.h"
+    #include "llvm/Support/CFG.h"
+    #include "llvm/Support/GetElementPtrTypeIterator.h"
 #endif
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/Passes.h"
@@ -82,22 +88,19 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#if defined(LLVM_3_1)
-  #include "llvm/Target/TargetData.h"
-#elif defined(LLVM_3_2)
+#if defined(LLVM_3_2)
   #include "llvm/DataLayout.h"
 #else // LLVM 3.3+
   #include "llvm/IR/DataLayout.h"
 #endif
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
 #if defined(LLVM_3_1) || defined(LLVM_3_2)
   #include "llvm/Support/InstVisitor.h"
-#else // LLVM 3.3+
+#elif defined (LLVM_3_3) || defined (LLVM_3_4)
   #include "llvm/InstVisitor.h"
+#else // LLVM 3.5+
+  #include "llvm/IR/InstVisitor.h"
 #endif
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -461,7 +464,11 @@ namespace {
 
       // Must not be used in inline asm, extractelement, or shufflevector.
       if (I.hasOneUse()) {
+#if defined(LLVM_3_5)
+        const llvm::Instruction &User = llvm::cast<llvm::Instruction>(*I.user_back());
+#else
         const llvm::Instruction &User = llvm::cast<llvm::Instruction>(*I.use_back());
+#endif
         if (isInlineAsm(User) || llvm::isa<llvm::ExtractElementInst>(User) ||
             llvm::isa<llvm::ShuffleVectorInst>(User) || llvm::isa<llvm::AtomicRMWInst>(User) ||
             llvm::isa<llvm::AtomicCmpXchgInst>(User))
@@ -469,7 +476,11 @@ namespace {
       }
 
       // Only inline instruction it if it's use is in the same BB as the inst.
+#if defined(LLVM_3_5)
+      return I.getParent() == llvm::cast<llvm::Instruction>(I.user_back())->getParent();
+#else
       return I.getParent() == llvm::cast<llvm::Instruction>(I.use_back())->getParent();
+#endif
     }
 
     // isDirectAlloca - Define fixed sized allocas in the entry block as direct
@@ -1462,7 +1473,7 @@ void CWriter::printConstant(llvm::Constant *CPV, bool Static) {
         char Buffer[100];
 
         uint64_t ll = llvm::DoubleToBits(V);
-        sprintf(Buffer, "0x%"PRIx64, ll);
+        sprintf(Buffer, "0x%" PRIx64, ll);
 
         std::string Num(&Buffer[0], &Buffer[6]);
         unsigned long Val = strtoul(Num.c_str(), 0, 16);
@@ -3123,7 +3134,11 @@ void CWriter::visitSwitchInst(llvm::SwitchInst &SI) {
     Out << ":\n";
     printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
     printBranchToBlock(SI.getParent(), Succ, 2);
+#if defined (LLVM_3_5)
+    if (llvm::Function::iterator(Succ) == std::next(llvm::Function::iterator(SI.getParent())))
+#else
     if (llvm::Function::iterator(Succ) == llvm::next(llvm::Function::iterator(SI.getParent())))
+#endif
       Out << "    break;\n";
   }
 
@@ -3144,7 +3159,11 @@ bool CWriter::isGotoCodeNecessary(llvm::BasicBlock *From, llvm::BasicBlock *To)
   /// FIXME: This should be reenabled, but loop reordering safe!!
   return true;
 
+#if defined (LLVM_3_5)
+  if (std::next(llvm::Function::iterator(From)) != llvm::Function::iterator(To))
+#else
   if (llvm::next(llvm::Function::iterator(From)) != llvm::Function::iterator(To))
+#endif
     return true;  // Not the direct successor, we need a goto.
 
   //llvm::isa<llvm::SwitchInst>(From->getTerminator())
@@ -3752,7 +3771,11 @@ void CWriter::lowerIntrinsics(llvm::Function &F) {
             // All other intrinsic calls we must lower.
             llvm::Instruction *Before = 0;
             if (CI != &BB->front())
+#if defined(LLVM_3_5)
+              Before = std::prev(llvm::BasicBlock::iterator(CI));
+#else
               Before = prior(llvm::BasicBlock::iterator(CI));
+#endif
 
             IL->LowerIntrinsicCall(CI);
             if (Before) {        // Move iterator to instruction after call
diff --git a/ctx.cpp b/ctx.cpp
index 1097a422..801c7392 100644
--- a/ctx.cpp
+++ b/ctx.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2013, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -278,7 +278,7 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
     disableGSWarningCount = 0;
 
     const Type *returnType = function->GetReturnType();
-    if (!returnType || Type::Equal(returnType, AtomicType::Void))
+    if (!returnType || returnType->IsVoidType())
         returnValuePtr = NULL;
     else {
         llvm::Type *ftype = returnType->LLVMType(g->ctx);
@@ -1246,7 +1246,7 @@ FunctionEmitContext::GetLabels() {
 void
 FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
     const Type *returnType = function->GetReturnType();
-    if (Type::Equal(returnType, AtomicType::Void)) {
+    if (returnType->IsVoidType()) {
         if (expr != NULL)
             Error(expr->pos, "Can't return non-void type \"%s\" from void function.",
                   expr->GetType()->GetString().c_str());
@@ -1628,7 +1628,14 @@ FunctionEmitContext::StartScope() {
         llvm::DILexicalBlock lexicalBlock =
             m->diBuilder->createLexicalBlock(parentScope, diFile,
                                              currentPos.first_line,
+#if defined(LLVM_3_5)
+        // Revision 202736 in LLVM adds support of DWARF discriminator
+        // to the last argument and revision 202737 in clang adds 0
+        // for the last argument by default.
+                                             currentPos.first_column, 0);
+#else
                                              currentPos.first_column);
+#endif
         AssertPos(currentPos, lexicalBlock.Verify());
         debugScopes.push_back(lexicalBlock);
     }
@@ -3592,7 +3599,7 @@ FunctionEmitContext::ReturnInst() {
         rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock);
     }
     else {
-        AssertPos(currentPos, Type::Equal(function->GetReturnType(), AtomicType::Void));
+        AssertPos(currentPos, function->GetReturnType()->IsVoidType());
         rinst = llvm::ReturnInst::Create(*g->ctx, bblock);
     }
 
diff --git a/ctx.h b/ctx.h
index 57160c17..54729e4f 100644
--- a/ctx.h
+++ b/ctx.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2013, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -47,9 +47,9 @@
   #include <llvm/IR/InstrTypes.h>
   #include <llvm/IR/Instructions.h>
 #endif
-#if defined(LLVM_3_1)
-  #include <llvm/Analysis/DebugInfo.h>
-  #include <llvm/Analysis/DIBuilder.h>
+#if defined(LLVM_3_5)
+  #include <llvm/IR/DebugInfo.h>
+  #include <llvm/IR/DIBuilder.h>
 #else
   #include <llvm/DebugInfo.h>
   #include <llvm/DIBuilder.h>
diff --git a/decl.cpp b/decl.cpp
index 27a6d580..279cfbfc 100644
--- a/decl.cpp
+++ b/decl.cpp
@@ -80,19 +80,19 @@ lApplyTypeQualifiers(int typeQualifiers, const Type *type, SourcePos pos) {
     }
 
     if ((typeQualifiers & TYPEQUAL_UNIFORM) != 0) {
-        if (Type::Equal(type, AtomicType::Void))
+        if (type->IsVoidType())
             Error(pos, "\"uniform\" qualifier is illegal with \"void\" type.");
         else
             type = type->GetAsUniformType();
     }
     else if ((typeQualifiers & TYPEQUAL_VARYING) != 0) {
-        if (Type::Equal(type, AtomicType::Void))
+        if (type->IsVoidType())
             Error(pos, "\"varying\" qualifier is illegal with \"void\" type.");
         else
             type = type->GetAsVaryingType();
     }
     else {
-        if (Type::Equal(type, AtomicType::Void) == false)
+        if (type->IsVoidType() == false)
             type = type->GetAsUnboundVariabilityType();
     }
 
@@ -399,7 +399,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
             type = refType;
     }
     else if (kind == DK_ARRAY) {
-        if (Type::Equal(baseType, AtomicType::Void)) {
+        if (baseType->IsVoidType()) {
             Error(pos, "Arrays of \"void\" type are illegal.");
             return;
         }
@@ -468,7 +468,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
                       "function parameter declaration for parameter \"%s\".",
                       lGetStorageClassName(d->declSpecs->storageClass),
                       decl->name.c_str());
-            if (Type::Equal(decl->type, AtomicType::Void)) {
+            if (decl->type->IsVoidType()) {
                 Error(decl->pos, "Parameter with type \"void\" illegal in function "
                       "parameter list.");
                 decl->type = NULL;
@@ -639,7 +639,7 @@ Declaration::GetVariableDeclarations() const {
             continue;
         }
 
-        if (Type::Equal(decl->type, AtomicType::Void))
+        if (decl->type->IsVoidType())
             Error(decl->pos, "\"void\" type variable illegal in declaration.");
         else if (CastType<FunctionType>(decl->type) == NULL) {
             decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
@@ -703,7 +703,7 @@ GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
         // FIXME: making this fake little DeclSpecs here is really
         // disgusting
         DeclSpecs ds(type);
-        if (Type::Equal(type, AtomicType::Void) == false) {
+        if (type->IsVoidType() == false) {
             if (type->IsUniformType())
                 ds.typeQualifiers |= TYPEQUAL_UNIFORM;
             else if (type->IsVaryingType())
@@ -717,7 +717,7 @@ GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
             Declarator *d = (*sd[i]->declarators)[j];
             d->InitFromDeclSpecs(&ds);
 
-            if (Type::Equal(d->type, AtomicType::Void))
+            if (d->type->IsVoidType())
                 Error(d->pos, "\"void\" type illegal for struct member.");
 
             elementTypes->push_back(d->type);
diff --git a/decl.h b/decl.h
index 1a240fd7..5bd366ec 100644
--- a/decl.h
+++ b/decl.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2012, Intel Corporation
+  Copyright (c) 2010-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/docs/ispc.rst b/docs/ispc.rst
index 8c1feaec..e3138760 100644
--- a/docs/ispc.rst
+++ b/docs/ispc.rst
@@ -361,7 +361,7 @@ the ``vout`` array before the next iteration of the ``foreach`` loop runs.
 
 On Linux\* and Mac OS\*, the makefile in that directory compiles this program.
 For Windows\*, open the ``examples/examples.sln`` file in Microsoft Visual
-C++ 2010\* to build this (and the other) examples.  In either case,
+C++ 2012\* to build this (and the other) examples.  In either case,
 build it now!  We'll walk through the details of the compilation steps in
 the following section, `Using The ISPC Compiler`_.)  In addition to
 compiling the ``ispc`` program, in this case the ``ispc`` compiler also
@@ -662,14 +662,14 @@ To compile for Xeon Phi™, first generate intermediate C++ code:
 The ``ispc`` distribution now includes a header file,
 ``examples/intrinsics/knc.h``, which maps from the generic C++ output
 to the corresponding intrinsic operations supported by Intel Xeon Phi™.
-Thus, to generate an object file, use the Intel C Compiler (``icc``) compile
+Thus, to generate an object file, use the Intel C++ Compiler (``icpc``) compile
 the C++ code generated by ``ispc``, setting the ``#include`` search
 path so that it can find the ``examples/intrinsics/knc.h`` header file
 in the ``ispc`` distribution.
 
 ::
 
-  icc -mmic -Iexamples/intrinsics/ foo.cpp -o foo.o 
+  icpc -mmic -Iexamples/intrinsics/ foo.cpp -o foo.o 
 
 With the current beta implementation, complex ``ispc`` programs are able to
 run on Xeon Phi™, though there are a number of known limitations:
@@ -690,14 +690,14 @@ run on Xeon Phi™, though there are a number of known limitations:
   where the memory address is actually aligned.  This may unnecessarily
   impact performance.
 
-* When requesting that ICC generate code with strict floating point
-  precision compliance (using ICC option ``-fp-model strict``) or
-  accurate reporting of floating point exceptions (using ICC option
+* When requesting that ICPC generate code with strict floating point
+  precision compliance (using ICPC option ``-fp-model strict``) or
+  accurate reporting of floating point exceptions (using ICPC option
   ``-fp-model except``) the compiler will generate code that uses the
   x87 unit rather than Xeon Phi™'s vector unit. For similar reasons, the
   options ``–ansi`` and ``–fmath-errno`` may result in calls to math
   functions that are implemented in x87 rather than using vector instructions.
-  This will have a significant performance impact. See the ICC manual for
+  This will have a significant performance impact. See the ICPC manual for
   details on these compiler options.
 
 All of these issues are currently actively being addressed and will be
@@ -3434,7 +3434,7 @@ for this argument.
 * ``fast``: more efficient but lower accuracy versions of the default ``ispc``
   implementations.
 * ``svml``: use Intel "Short Vector Math Library".  Use
-  ``icc`` to link your final executable so that the appropriate libraries
+  ``icpc`` to link your final executable so that the appropriate libraries
   are linked.
 * ``system``: use the system's math library.  On many systems, these
   functions are more accurate than both of ``ispc``'s implementations.
@@ -3622,6 +3622,39 @@ normalized exponent as a power of two in the ``pw2`` parameter.
                         uniform int * uniform pw2)
 
 
+Saturating Arithmetic
+---------------------
+A saturation (no overflow possible) addition, substraction, multiplication and 
+division of all integer types are provided by the ``ispc`` standard library.
+
+::
+
+     int8 saturating_add(uniform int8 a, uniform int8 b)
+     int8 saturating_add(varying int8 a, varying int8 b)    
+     unsigned int8 saturating_add(uniform unsigned int8 a, uniform unsigned int8 b)
+     unsigned int8 saturating_add(varying unsigned int8 a, varying unsigned int8 b)
+
+     int8 saturating_sub(uniform int8 a, uniform int8 b)
+     int8 saturating_sub(varying int8 a, varying int8 b)    
+     unsigned int8 saturating_sub(uniform unsigned int8 a, uniform unsigned int8 b)
+     unsigned int8 saturating_sub(varying unsigned int8 a, varying unsigned int8 b)
+
+     int8 saturating_mul(uniform int8 a, uniform int8 b)
+     int8 saturating_mul(varying int8 a, varying int8 b)    
+     unsigned int8 saturating_mul(uniform unsigned int8 a, uniform unsigned int8 b)
+     unsigned int8 saturating_mul(varying unsigned int8 a, varying unsigned int8 b)
+
+     int8 saturating_div(uniform int8 a, uniform int8 b)
+     int8 saturating_div(varying int8 a, varying int8 b)    
+     unsigned int8 saturating_div(uniform unsigned int8 a, uniform unsigned int8 b)
+     unsigned int8 saturating_div(varying unsigned int8 a,varying unsigned int8 b)
+
+
+In addition to the ``int8`` variants of saturating arithmetic functions listed 
+above, there are versions that supports ``int16``, ``int32`` and ``int64`` 
+values as well.
+
+
 Pseudo-Random Numbers
 ---------------------
 
@@ -4045,7 +4078,9 @@ overlap.
     void memmove(void * varying dst, void * varying src, int32 count)
 
 Note that there are variants of these functions that take both ``uniform``
-and ``varying`` pointers.
+and ``varying`` pointers.  Also note that ``sizeof(float)`` and 
+``sizeof(uniform float)`` return different values, so programmers should
+take care when calculating ``count``.
 
 To initialize values in memory, the ``memset`` routine can be used.  (It
 also behaves like the function of the same name in the C Standard Library.)
@@ -4955,7 +4990,7 @@ countries.
 
 * Other names and brands may be claimed as the property of others.
 
-Copyright(C) 2011-2013, Intel Corporation. All rights reserved.
+Copyright(C) 2011-2014, Intel Corporation. All rights reserved.
 
 
 Optimization Notice
diff --git a/examples/aobench_instrumented/Makefile b/examples/aobench_instrumented/Makefile
index d0b27cbf..d47a5c31 100644
--- a/examples/aobench_instrumented/Makefile
+++ b/examples/aobench_instrumented/Makefile
@@ -20,7 +20,7 @@ ao: objs/ao.o objs/instrument.o objs/ao_ispc.o ../tasksys.cpp
 objs/%.o: %.cpp dirs
 	$(CXX) $< $(CXXFLAGS) -c -o $@
 
-objs/ao.o: objs/ao_ispc.h 
+objs/ao.o: objs/ao_instrumented_ispc.h
 
 objs/%_ispc.h objs/%_ispc.o: %.ispc dirs
 	$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_instrumented_ispc.h
diff --git a/examples/aobench_instrumented/ao.ispc b/examples/aobench_instrumented/ao_instrumented.ispc
similarity index 100%
rename from examples/aobench_instrumented/ao.ispc
rename to examples/aobench_instrumented/ao_instrumented.ispc
diff --git a/examples/aobench_instrumented/aobench_instrumented.vcxproj b/examples/aobench_instrumented/aobench_instrumented.vcxproj
index 5247762c..6eaf55d9 100644
--- a/examples/aobench_instrumented/aobench_instrumented.vcxproj
+++ b/examples/aobench_instrumented/aobench_instrumented.vcxproj
@@ -18,158 +18,18 @@
       <Platform>x64</Platform>
     </ProjectConfiguration>
   </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>aobench_instrumented</RootNamespace>
+    <ISPC_file>ao_instrumented</ISPC_file>
+    <default_targets>sse2</default_targets>
+    <flags>--instrument</flags>
+  </PropertyGroup>
+  <Import Project="..\common.props" />
   <ItemGroup>
     <ClCompile Include="ao.cpp" />
     <ClCompile Include="instrument.cpp" />
     <ClCompile Include="../tasksys.cpp" />
   </ItemGroup>
-  <ItemGroup>
-    <CustomBuild Include="ao.ispc">
-      <FileType>Document</FileType>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --arch=x86 --instrument --target=sse2
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --instrument --target=sse2
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --arch=x86 --instrument --target=sse2
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename)_instrumented.obj -h $(TargetDir)%(Filename)_instrumented_ispc.h --instrument --target=sse2
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename)_instrumented.obj;$(TargetDir)%(Filename)_instrumented_ispc.h</Outputs>
-    </CustomBuild>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}</ProjectGuid>
-    <Keyword>Win32Proj</Keyword>
-    <RootNamespace>aobench_instrumented</RootNamespace>
-    <ISPC_compiler Condition=" '$(ISPC_compiler)' == '' ">ispc</ISPC_compiler>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <LinkIncremental>true</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-    <PreBuildEventUseInBuild>true</PreBuildEventUseInBuild>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-    <PreBuildEventUseInBuild>true</PreBuildEventUseInBuild>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <LinkIncremental>false</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-    <PreBuildEventUseInBuild>true</PreBuildEventUseInBuild>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-    <PreBuildEventUseInBuild>true</PreBuildEventUseInBuild>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-  </ItemDefinitionGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
 </Project>
diff --git a/examples/common.props b/examples/common.props
index 3769330b..5cfad4fc 100644
--- a/examples/common.props
+++ b/examples/common.props
@@ -23,23 +23,27 @@
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
     <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
     <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v110</PlatformToolset>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -156,12 +160,12 @@
   <ItemGroup>
     <CustomBuild Include='$(ISPC_file).ispc'>
       <FileType>Document</FileType>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str) $(flags)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str) $(flags)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Target_out)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Target_out)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str)</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=$(Target_str) $(flags)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o %(Filename).obj -h %(Filename)_ispc.h --target=$(Target_str) $(flags)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Target_out)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Target_out)</Outputs>
     </CustomBuild>
diff --git a/examples/deferred/deferred_shading.vcxproj b/examples/deferred/deferred_shading.vcxproj
index 974e870b..3e6c4c12 100755
--- a/examples/deferred/deferred_shading.vcxproj
+++ b/examples/deferred/deferred_shading.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
@@ -17,7 +17,7 @@
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
-  </ItemGroup>	
+  </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{87f53c53-957e-4e91-878a-bc27828fb9eb}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
diff --git a/examples/examples.sln b/examples/examples.sln
index 2285f6a6..5de51b74 100755
--- a/examples/examples.sln
+++ b/examples/examples.sln
@@ -1,6 +1,6 @@
 ﻿
-Microsoft Visual Studio Solution File, Format Version 11.00
-# Visual Studio 2010
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2012
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "simple", "simple\simple.vcxproj", "{947C5311-8B78-4D05-BEE4-BCF342D4B367}"
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rt", "rt\rt.vcxproj", "{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}"
diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h
index 4bdb184a..e674f409 100644
--- a/examples/intrinsics/knc.h
+++ b/examples/intrinsics/knc.h
@@ -599,8 +599,10 @@ template <> static FORCEINLINE void __store<64>(__vec16_i32 *p, __vec16_i32 v) {
 // int64
 ///////////////////////////////////////////////////////////////////////////
 
-static FORCEINLINE int64_t __extract_element(__vec16_i64 v, uint32_t index) {
-    return (uint64_t(((int32_t *)&v.v_hi)[index])<<32) | (uint64_t(((int32_t *)&v.v_lo)[index]));
+static FORCEINLINE int64_t __extract_element(const __vec16_i64 &v, uint32_t index)
+{
+    uint *src = (uint *)&v;
+    return src[index+16] | (int64_t(src[index]) << 32);
 }
 
 static FORCEINLINE void __insert_element(__vec16_i64 *v, uint32_t index, int64_t val) {
@@ -751,12 +753,6 @@ static FORCEINLINE __vec16_i64 __select(__vec16_i1 mask,
     return ret;
 }
 
-static FORCEINLINE int64_t __extract_element(const __vec16_i64 &v, uint32_t index)
-{
-    uint *src = (uint *)&v;
-    return src[index+16] | (int64_t(src[index]) << 32);
-}
-
 template <class RetVecType> RetVecType __smear_i64(const int64_t &l);
 template <> FORCEINLINE  __vec16_i64 __smear_i64<__vec16_i64>(const int64_t &l) {
     const int *i = (const int*)&l;
diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj
index 113fc4e8..a10cd0ae 100644
--- a/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj
+++ b/examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
@@ -17,7 +17,7 @@
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
-  </ItemGroup>	
+  </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
diff --git a/examples/noise/noise.vcxproj b/examples/noise/noise.vcxproj
index ff3953ae..f0e6e207 100644
--- a/examples/noise/noise.vcxproj
+++ b/examples/noise/noise.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
@@ -17,7 +17,7 @@
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
-  </ItemGroup>	
+  </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{0E0886D8-8B5E-4EAF-9A21-91E63DAF81FD}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
diff --git a/examples/options/options.vcxproj b/examples/options/options.vcxproj
index d48ac8bc..526f8450 100644
--- a/examples/options/options.vcxproj
+++ b/examples/options/options.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
@@ -17,7 +17,7 @@
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
-  </ItemGroup>	
+  </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
diff --git a/examples/perfbench/perfbench.vcxproj b/examples/perfbench/perfbench.vcxproj
index d94b753c..7bafb480 100644
--- a/examples/perfbench/perfbench.vcxproj
+++ b/examples/perfbench/perfbench.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
@@ -22,155 +22,12 @@
     <ProjectGuid>{d923bb7e-a7c8-4850-8fcf-0eb9ce35b4e8}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>perfbench</RootNamespace>
-    <ISPC_compiler Condition=" '$(ISPC_compiler)' == '' ">ispc</ISPC_compiler>
+    <ISPC_file>perfbench</ISPC_file>
+    <default_targets>sse2-i32x4,sse4-i32x4,avx1-i32x8</default_targets>
   </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <LinkIncremental>true</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <LinkIncremental>false</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <FloatingPointModel>Fast</FloatingPointModel>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <FloatingPointModel>Fast</FloatingPointModel>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-      <FloatingPointModel>Fast</FloatingPointModel>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-      <FloatingPointModel>Fast</FloatingPointModel>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-  </ItemDefinitionGroup>
+  <Import Project="..\common.props" />
   <ItemGroup>
     <ClCompile Include="perfbench.cpp" />
     <ClCompile Include="perfbench_serial.cpp" />
   </ItemGroup>
-  <ItemGroup>
-    <CustomBuild Include="perfbench.ispc">
-      <FileType>Document</FileType>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4,avx
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
-    </CustomBuild>
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
 </Project>
diff --git a/examples/rt/rt.vcxproj b/examples/rt/rt.vcxproj
index 00b6dd3a..38b34879 100644
--- a/examples/rt/rt.vcxproj
+++ b/examples/rt/rt.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
@@ -17,7 +17,7 @@
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
-  </ItemGroup>	
+  </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
diff --git a/examples/simple/simple.vcxproj b/examples/simple/simple.vcxproj
index 34908223..a540353c 100644
--- a/examples/simple/simple.vcxproj
+++ b/examples/simple/simple.vcxproj
@@ -18,156 +18,15 @@
       <Platform>x64</Platform>
     </ProjectConfiguration>
   </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="simple.cpp" />
-  </ItemGroup>
-  <ItemGroup>
-    <CustomBuild Include="simple.ispc">
-      <FileType>Document</FileType>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-$(ISPC_compiler) -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_ispc.h</Outputs>
-    </CustomBuild>
-  </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{947C5311-8B78-4D05-BEE4-BCF342D4B367}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>simple</RootNamespace>
-    <ISPC_compiler Condition=" '$(ISPC_compiler)' == '' ">ispc</ISPC_compiler>
+    <ISPC_file>simple</ISPC_file>
+    <default_targets>sse2</default_targets>
   </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <LinkIncremental>true</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <LinkIncremental>false</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-    <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>$(TargetDir)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-  </ItemDefinitionGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
+  <Import Project="..\common.props" />
+  <ItemGroup>
+    <ClCompile Include="simple.cpp" />
+  </ItemGroup>
 </Project>
diff --git a/examples/sort/sort.vcxproj b/examples/sort/sort.vcxproj
index b0bdc63d..f50a16b4 100644
--- a/examples/sort/sort.vcxproj
+++ b/examples/sort/sort.vcxproj
@@ -17,7 +17,7 @@
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
-  </ItemGroup>	
+  </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C2}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
diff --git a/examples/stencil/stencil.vcxproj b/examples/stencil/stencil.vcxproj
index fd8564aa..168039bc 100644
--- a/examples/stencil/stencil.vcxproj
+++ b/examples/stencil/stencil.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
diff --git a/examples/volume_rendering/volume.vcxproj b/examples/volume_rendering/volume.vcxproj
index a1fea5f1..d0e3d8d0 100644
--- a/examples/volume_rendering/volume.vcxproj
+++ b/examples/volume_rendering/volume.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
@@ -17,7 +17,7 @@
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
-  </ItemGroup>	
+  </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{dee5733a-e93e-449d-9114-9bffcaeb4df9}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
diff --git a/expr.cpp b/expr.cpp
index 4a473fe7..cb1f8dcc 100644
--- a/expr.cpp
+++ b/expr.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2013, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -74,8 +74,11 @@
   #include <llvm/IR/CallingConv.h>
 #endif
 #include <llvm/ExecutionEngine/GenericValue.h>
-#include <llvm/Support/InstIterator.h>
-
+#if defined(LLVM_3_5)
+  #include <llvm/IR/InstIterator.h>
+#else
+  #include <llvm/Support/InstIterator.h>
+#endif
 
 /////////////////////////////////////////////////////////////////////////////////////
 // Expr
@@ -206,14 +209,14 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr,
     if (Type::Equal(toType, fromType))
         return true;
 
-    if (Type::Equal(fromType, AtomicType::Void)) {
+    if (fromType->IsVoidType()) {
         if (!failureOk)
             Error(pos, "Can't convert from \"void\" to \"%s\" for %s.",
                   toType->GetString().c_str(), errorMsgBase);
         return false;
     }
 
-    if (Type::Equal(toType, AtomicType::Void)) {
+    if (toType->IsVoidType()) {
         if (!failureOk)
             Error(pos, "Can't convert type \"%s\" to \"void\" for %s.",
                   fromType->GetString().c_str(), errorMsgBase);
@@ -339,7 +342,8 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr,
             return false;
         }
         else if (PointerType::IsVoidPointer(toPointerType)) {
-          if (fromPointerType->GetBaseType()->IsConstType()) {
+          if (fromPointerType->GetBaseType()->IsConstType() &&
+              !(toPointerType->GetBaseType()->IsConstType())) {
             if (!failureOk)
               Error(pos, "Can't convert pointer to const \"%s\" to void pointer.",
                     fromPointerType->GetString().c_str());
@@ -3608,7 +3612,7 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
 
     const FunctionType *ft = lGetFunctionType(func);
     AssertPos(pos, ft != NULL);
-    bool isVoidFunc = Type::Equal(ft->GetReturnType(), AtomicType::Void);
+    bool isVoidFunc = ft->GetReturnType()->IsVoidType();
 
     // Automatically convert function call args to references if needed.
     // FIXME: this should move to the TypeCheck() method... (but the
@@ -3895,7 +3899,7 @@ FunctionCallExpr::TypeCheck() {
 
         if (fptrType->IsVaryingType()) {
             const Type *retType = funcType->GetReturnType();
-            if (Type::Equal(retType, AtomicType::Void) == false &&
+            if (retType->IsVoidType() == false &&
                 retType->IsUniformType()) {
                 Error(pos, "Illegal to call a varying function pointer that "
                       "points to a function with a uniform return type \"%s\".",
@@ -4603,7 +4607,7 @@ IndexExpr::TypeCheck() {
 
     if (!CastType<SequentialType>(baseExprType->GetReferenceTarget())) {
         if (const PointerType *pt = CastType<PointerType>(baseExprType)) {
-            if (Type::Equal(AtomicType::Void, pt->GetBaseType())) {
+            if (pt->GetBaseType()->IsVoidType()) {
                 Error(pos, "Illegal to dereference void pointer type \"%s\".",
                       baseExprType->GetString().c_str());
                 return NULL;
@@ -6194,10 +6198,10 @@ ConstExpr::Print() const {
             printf("%f", floatVal[i]);
             break;
         case AtomicType::TYPE_INT64:
-            printf("%"PRId64, int64Val[i]);
+            printf("%" PRId64, int64Val[i]);
             break;
         case AtomicType::TYPE_UINT64:
-            printf("%"PRIu64, uint64Val[i]);
+            printf("%" PRIu64, uint64Val[i]);
             break;
         case AtomicType::TYPE_DOUBLE:
             printf("%f", doubleVal[i]);
@@ -6797,7 +6801,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
         return NULL;
     }
 
-    if (Type::Equal(toType, AtomicType::Void)) {
+    if (toType->IsVoidType()) {
         // emit the code for the expression in case it has side-effects but
         // then we're done.
         (void)expr->GetValue(ctx);
@@ -7160,10 +7164,10 @@ TypeCastExpr::TypeCheck() {
     toType = lDeconstifyType(toType);
 
     // Anything can be cast to void...
-    if (Type::Equal(toType, AtomicType::Void))
+    if (toType->IsVoidType())
         return this;
 
-    if (Type::Equal(fromType, AtomicType::Void) ||
+    if (fromType->IsVoidType() ||
         (fromType->IsVaryingType() && toType->IsUniformType())) {
         Error(pos, "Can't type cast from type \"%s\" to type \"%s\"",
               fromType->GetString().c_str(), toType->GetString().c_str());
@@ -7586,7 +7590,7 @@ PtrDerefExpr::TypeCheck() {
     }
 
     if (const PointerType *pt = CastType<PointerType>(type)) {
-        if (Type::Equal(AtomicType::Void, pt->GetBaseType())) {
+        if (pt->GetBaseType()->IsVoidType()) {
             Error(pos, "Illegal to dereference void pointer type \"%s\".",
                   type->GetString().c_str());
             return NULL;
diff --git a/expr.h b/expr.h
index 38617e8e..b539ff1b 100644
--- a/expr.h
+++ b/expr.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2012, Intel Corporation
+  Copyright (c) 2010-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/fail_db.txt b/fail_db.txt
index 02432603..dd0fbdcf 100644
--- a/fail_db.txt
+++ b/fail_db.txt
@@ -257,478 +257,12 @@
 ./tests/reduce-equal-5.ispc compfail  x86-64     generic-16   Linux LLVM 3.5 clang++3.4 -O0 *
 ./tests/reduce-equal-6.ispc compfail  x86-64     generic-16   Linux LLVM 3.5 clang++3.4 -O0 *
 ./tests/reduce-equal-8.ispc compfail  x86-64     generic-16   Linux LLVM 3.5 clang++3.4 -O0 *
-./tests/foreach-double-1.ispc runfail     x86     avx2-i32x8   Linux LLVM 3.5 clang++3.4 -O2 *
-./tests/foreach-double-1.ispc runfail     x86    avx2-i32x16   Linux LLVM 3.5 clang++3.4 -O2 *
-./tests/foreach-double-1.ispc runfail     x86     avx2-i64x4   Linux LLVM 3.5 clang++3.4 -O2 *
-./tests/ptr-int-1.ispc runfail     x86     avx2-i32x8   Linux LLVM 3.4 clang++3.4 -O2 *
-./tests/ptr-int-1.ispc runfail     x86    avx2-i32x16   Linux LLVM 3.4 clang++3.4 -O2 *
-./tests/ptr-int-1.ispc runfail     x86     avx2-i64x4   Linux LLVM 3.4 clang++3.4 -O2 *
-./tests/ptr-int-1.ispc runfail     x86     avx2-i32x8   Linux LLVM 3.5 clang++3.4 -O2 *
-./tests/ptr-int-1.ispc runfail     x86    avx2-i32x16   Linux LLVM 3.5 clang++3.4 -O2 *
-./tests/ptr-int-1.ispc runfail     x86     avx2-i64x4   Linux LLVM 3.5 clang++3.4 -O2 *
-.\tests\exclusive-scan-add-9.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-equal-10.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint64.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min-1.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\exclusive-scan-add-9.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-equal-10.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint64.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\uint64-max-1.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\uint64-max.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\uint64-min-1.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\uint64-min.ispc runfail     x86     sse2-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\exclusive-scan-add-9.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-equal-10.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint64.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min-1.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-9.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-equal-10.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint64.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\uint64-max-1.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\uint64-max.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\uint64-min-1.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\uint64-min.ispc runfail     x86     sse2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\exclusive-scan-add-9.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86     sse4-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86     sse4-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-13.ispc compfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86     sse4-i16x8 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\exclusive-scan-add-9.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-equal-10.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-min-uint64.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-13.ispc compfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86     sse4-i8x16 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\exclusive-scan-add-9.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86     avx1-i32x4 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint64.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86     avx1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint64.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-min-uint64.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86    avx1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\exclusive-scan-add-9.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86     avx1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint64.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86   avx1.1-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint64.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-min-uint64.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86  avx1.1-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\exclusive-scan-add-9.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-min.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86   avx1.1-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint64.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint64.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-min-uint64.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.3         cl -O0 *
-.\tests\exclusive-scan-add-10.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\exclusive-scan-add-9.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\max-uint.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\min-uint-2.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-load-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\packed-store2.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-add-uint64.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\reduce-max-uint.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\uint64-max-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-10.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-varyingptr-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-10.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\local-atomics-varyingptr-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\max-uint.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\min-uint-2.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-load-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\packed-store2.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-add-uint64.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\reduce-max-uint.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.3         cl -O0 *
-.\tests\atomics-13.ispc compfail  x86-64     sse4-i16x8 Windows LLVM 3.3         cl -O2 *
-.\tests\atomics-13.ispc compfail  x86-64     sse4-i8x16 Windows LLVM 3.3         cl -O2 *
-.\tests\ptr-int-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.4         cl -O2 *
-.\tests\ptr-int-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.4         cl -O2 *
-.\tests\ptr-int-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.4         cl -O2 *
-.\tests\ptr-int-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.5         cl -O2 *
-.\tests\ptr-int-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.5         cl -O2 *
-.\tests\ptr-int-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.5         cl -O2 *
+.\tests\foreach-double-1.ispc runfail     x86     avx2-i32x8 Windows LLVM 3.5         cl -O2 *
+.\tests\foreach-double-1.ispc runfail     x86    avx2-i32x16 Windows LLVM 3.5         cl -O2 *
+.\tests\foreach-double-1.ispc runfail     x86     avx2-i64x4 Windows LLVM 3.5         cl -O2 *
+./tests/ptr-22.ispc runfail  x86-64      generic-4   Linux LLVM 3.3 clang++3.4 -O0 *
+./tests/ptr-22.ispc runfail  x86-64     generic-16   Linux LLVM 3.3 clang++3.4 -O0 *
+./tests/ptr-22.ispc runfail  x86-64      generic-4   Linux LLVM 3.4 clang++3.4 -O0 *
+./tests/ptr-22.ispc runfail  x86-64     generic-16   Linux LLVM 3.4 clang++3.4 -O0 *
+./tests/ptr-22.ispc runfail  x86-64      generic-4   Linux LLVM 3.5 clang++3.4 -O0 *
+./tests/ptr-22.ispc runfail  x86-64     generic-16   Linux LLVM 3.5 clang++3.4 -O0 *
diff --git a/func.cpp b/func.cpp
index 578dd68a..b821ec87 100644
--- a/func.cpp
+++ b/func.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2011-2013, Intel Corporation
+  Copyright (c) 2011-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -74,11 +74,12 @@
 #if defined(LLVM_3_5)
     #include <llvm/IR/Verifier.h>
     #include <llvm/IR/IRPrintingPasses.h>
+    #include <llvm/IR/CFG.h>
 #else
     #include <llvm/Analysis/Verifier.h>
     #include <llvm/Assembly/PrintModulePass.h>
+    #include <llvm/Support/CFG.h>
 #endif
-#include <llvm/Support/CFG.h>
 #include <llvm/Support/ToolOutputFile.h>
 
 Function::Function(Symbol *s, Stmt *c) {
@@ -438,7 +439,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
         // issue a warning.  Also need to warn if it's the entry block for
         // the function (in which case it will not have predeccesors but is
         // still reachable.)
-        if (Type::Equal(type->GetReturnType(), AtomicType::Void) == false &&
+        if (type->GetReturnType()->IsVoidType() == false &&
             (pred_begin(ec.bblock) != pred_end(ec.bblock) || (ec.bblock == entryBBlock)))
             Warning(sym->pos, "Missing return statement in function returning \"%s\".",
                     type->rType->GetString().c_str());
diff --git a/func.h b/func.h
index 88a96dbc..3019eeb1 100644
--- a/func.h
+++ b/func.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2011-2012, Intel Corporation
+  Copyright (c) 2011-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/ispc.cpp b/ispc.cpp
index 88befdf9..e44d2e5c 100644
--- a/ispc.cpp
+++ b/ispc.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2013, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -57,9 +57,9 @@
   #include <llvm/IR/Module.h>
   #include <llvm/IR/Instructions.h>
 #endif
-#if defined(LLVM_3_1)
-  #include <llvm/Analysis/DebugInfo.h>
-  #include <llvm/Analysis/DIBuilder.h>
+#if defined(LLVM_3_5)
+  #include <llvm/IR/DebugInfo.h>
+  #include <llvm/IR/DIBuilder.h>
 #else
   #include <llvm/DebugInfo.h>
   #include <llvm/DIBuilder.h>
diff --git a/ispc.h b/ispc.h
index 0f241da4..b6e60a79 100644
--- a/ispc.h
+++ b/ispc.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2013, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/ispc.sln b/ispc.sln
index 8febee18..a9dbb793 100755
--- a/ispc.sln
+++ b/ispc.sln
@@ -1,6 +1,6 @@
 ﻿
-Microsoft Visual Studio Solution File, Format Version 11.00
-# Visual Studio 2010
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2012
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ispc", "ispc.vcxproj", "{9861F490-F516-480C-B63C-D62A77AFA9D5}"
 EndProject
 Global
diff --git a/module.cpp b/module.cpp
index 25db0fed..d8f165bc 100644
--- a/module.cpp
+++ b/module.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2013, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -86,9 +86,7 @@
 #include <llvm/Support/FileUtilities.h>
 #include <llvm/Target/TargetMachine.h>
 #include <llvm/Target/TargetOptions.h>
-#if defined(LLVM_3_1)
-  #include <llvm/Target/TargetData.h>
-#elif defined(LLVM_3_2)
+#if defined(LLVM_3_2)
   #include <llvm/DataLayout.h>
   #include <llvm/TargetTransformInfo.h>
 #else // LLVM 3.3+
@@ -98,11 +96,12 @@
 #if defined(LLVM_3_5)
     #include <llvm/IR/Verifier.h>
     #include <llvm/IR/IRPrintingPasses.h>
+    #include <llvm/IR/CFG.h>
 #else
     #include <llvm/Analysis/Verifier.h>
     #include <llvm/Assembly/PrintModulePass.h>
+    #include <llvm/Support/CFG.h>
 #endif
-#include <llvm/Support/CFG.h>
 #include <clang/Frontend/CompilerInstance.h>
 #include <clang/Frontend/TextDiagnosticPrinter.h>
 #include <clang/Frontend/Utils.h>
@@ -427,7 +426,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
         return;
     }
 
-    if (Type::Equal(type, AtomicType::Void)) {
+    if (type->IsVoidType()) {
         Error(pos, "\"void\" type global variable is illegal.");
         return;
     }
@@ -867,7 +866,7 @@ Module::AddFunctionDeclaration(const std::string &name,
               "exported function \"%s\"", name.c_str());
 
     if (functionType->isTask &&
-        Type::Equal(functionType->GetReturnType(), AtomicType::Void) == false)
+        functionType->GetReturnType()->IsVoidType() == false)
         Error(pos, "Task-qualified functions must have void return type.");
 
     if (g->target->getISA() == Target::NVPTX &&
@@ -1008,6 +1007,15 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
         lStripUnusedDebugInfo(module);
     }
 
+#if defined (LLVM_3_4) || defined (LLVM_3_5)
+    // In LLVM_3_4 after r195494 and r195504 revisions we should pass
+    // "Debug Info Version" constant to the module. LLVM will ignore
+    // our Debug Info metadata without it.
+    if (g->generateDebuggingSymbols == true) {
+        module->addModuleFlag(llvm::Module::Error, "Debug Info Version", llvm::DEBUG_METADATA_VERSION);
+    }
+#endif
+
     // First, issue a warning if the output file suffix and the type of
     // file being created seem to mismatch.  This can help catch missing
     // command-line arguments specifying the output file type.
@@ -1150,9 +1158,13 @@ Module::writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine,
     bool binary = (fileType == llvm::TargetMachine::CGFT_ObjectFile);
 #if defined(LLVM_3_1) || defined(LLVM_3_2) || defined(LLVM_3_3)
     unsigned int flags = binary ? llvm::raw_fd_ostream::F_Binary : 0;
-#else
+#elif defined(LLVM_3_4)
     llvm::sys::fs::OpenFlags flags = binary ? llvm::sys::fs::F_Binary :
         llvm::sys::fs::F_None;
+#else
+    llvm::sys::fs::OpenFlags flags = binary ? llvm::sys::fs::F_None :
+        llvm::sys::fs::F_Text;
+
 #endif
 
     std::string error;
@@ -1163,8 +1175,8 @@ Module::writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine,
     }
 
     llvm::PassManager pm;
-#if defined(LLVM_3_1)
-    pm.add(new llvm::TargetData(*g->target->getDataLayout()));
+#if defined(LLVM_3_5)
+    pm.add(new llvm::DataLayoutPass(*g->target->getDataLayout()));
 #else
     pm.add(new llvm::DataLayout(*g->target->getDataLayout()));
 #endif
@@ -1222,11 +1234,11 @@ lContainsPtrToVarying(const StructType *st) {
  */
 static void
 lEmitStructDecl(const StructType *st, std::vector<const StructType *> *emittedStructs,
-                FILE *file, bool printGenericHeader=false, bool emitUnifs=true) {
+                FILE *file, bool emitUnifs=true) {
 
     // if we're emitting this for a generic dispatch header file and it's 
     // struct that only contains uniforms, don't bother if we're emitting uniforms
-    if (printGenericHeader && !emitUnifs && !lContainsPtrToVarying(st)) {
+    if (!emitUnifs && !lContainsPtrToVarying(st)) {
       return;
     }
 
@@ -1242,33 +1254,20 @@ lEmitStructDecl(const StructType *st, std::vector<const StructType *> *emittedSt
         const StructType *elementStructType =
             lGetElementStructType(st->GetElementType(i));
         if (elementStructType != NULL)
-          lEmitStructDecl(elementStructType, emittedStructs, file, printGenericHeader, emitUnifs);
+          lEmitStructDecl(elementStructType, emittedStructs, file, emitUnifs);
     }
 
     // And now it's safe to declare this one
     emittedStructs->push_back(st);
-
     
-    if (printGenericHeader && lContainsPtrToVarying(st)) {
-      fprintf(file, "#ifndef __ISPC_STRUCT_%s%d__\n",
-              st->GetStructName().c_str(), 
-              g->target->getVectorWidth());
-      fprintf(file, "#define __ISPC_STRUCT_%s%d__\n",
-              st->GetStructName().c_str(),
-              g->target->getVectorWidth());
-    }
-    else {
-      fprintf(file, "#ifndef __ISPC_STRUCT_%s__\n",st->GetStructName().c_str());
-      fprintf(file, "#define __ISPC_STRUCT_%s__\n",st->GetStructName().c_str());
-    }
-    fprintf(file, "struct %s", st->GetStructName().c_str());
+    fprintf(file, "#ifndef __ISPC_STRUCT_%s__\n",st->GetCStructName().c_str());
+    fprintf(file, "#define __ISPC_STRUCT_%s__\n",st->GetCStructName().c_str());
+
+    fprintf(file, "struct %s", st->GetCStructName().c_str());
     if (st->GetSOAWidth() > 0)
         // This has to match the naming scheme in
         // StructType::GetCDeclaration().
         fprintf(file, "_SOA%d", st->GetSOAWidth());
-    if (printGenericHeader && lContainsPtrToVarying(st)) {
-      fprintf(file, "%d", g->target->getVectorWidth());
-    }
     fprintf(file, " {\n");
 
     for (int i = 0; i < st->GetElementCount(); ++i) {
@@ -1285,10 +1284,10 @@ lEmitStructDecl(const StructType *st, std::vector<const StructType *> *emittedSt
     header file, emit their declarations.
  */
 static void
-lEmitStructDecls(std::vector<const StructType *> &structTypes, FILE *file, bool printGenericHeader=false, bool emitUnifs=true) {
+lEmitStructDecls(std::vector<const StructType *> &structTypes, FILE *file, bool emitUnifs=true) {
     std::vector<const StructType *> emittedStructs;
     for (unsigned int i = 0; i < structTypes.size(); ++i)
-      lEmitStructDecl(structTypes[i], &emittedStructs, file, printGenericHeader, emitUnifs);
+      lEmitStructDecl(structTypes[i], &emittedStructs, file, emitUnifs);
 }
 
 
@@ -2004,7 +2003,7 @@ Module::writeDispatchHeader(DispatchHeaderInfo *DHI) {
           lEmitVectorTypedefs(exportedVectorTypes, f);
           lEmitEnumDecls(exportedEnumTypes, f);
         }
-        lEmitStructDecls(exportedStructTypes, f, true, DHI->EmitUnifs);
+        lEmitStructDecls(exportedStructTypes, f, DHI->EmitUnifs);
         
         // Update flags
         DHI->EmitUnifs = false;
@@ -2185,12 +2184,12 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
       opts.addMacroDef("taskCount=__taskCount()");
     }
 
-#if defined(LLVM_3_1)
-    inst.getLangOpts().BCPLComment = 1;
-#else
     inst.getLangOpts().LineComment = 1;
-#endif
+#if defined(LLVM_3_5)
+    inst.createPreprocessor(clang::TU_Complete);
+#else
     inst.createPreprocessor();
+#endif
 
     diagPrinter->BeginSourceFile(inst.getLangOpts(), &inst.getPreprocessor());
     clang::DoPrintPreprocessedInput(inst.getPreprocessor(),
diff --git a/module.h b/module.h
index 3609260c..c1350063 100644
--- a/module.h
+++ b/module.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2012, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -41,9 +41,12 @@
 
 #include "ispc.h"
 #include "ast.h"
-#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3)
+#if defined(LLVM_3_4)
   #include <llvm/DebugInfo.h>
 #endif
+#if defined(LLVM_3_5)
+  #include <llvm/IR/DebugInfo.h>
+#endif
 
 namespace llvm
 {
diff --git a/opt.cpp b/opt.cpp
index 19ce8386..12474e6b 100644
--- a/opt.cpp
+++ b/opt.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2013, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -71,9 +71,13 @@
 #if defined(LLVM_3_5)
     #include <llvm/IR/Verifier.h>
     #include <llvm/IR/IRPrintingPasses.h>
+    #include <llvm/IR/PatternMatch.h>
+    #include <llvm/IR/DebugInfo.h>
 #else
     #include <llvm/Analysis/Verifier.h>
     #include <llvm/Assembly/PrintModulePass.h>
+    #include <llvm/Support/PatternMatch.h>
+    #include <llvm/DebugInfo.h>
 #endif
 #include <llvm/Analysis/ConstantFolding.h>
 #include <llvm/Target/TargetLibraryInfo.h>
@@ -83,9 +87,7 @@
 #include <llvm/Transforms/IPO.h>
 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
 #include <llvm/Target/TargetOptions.h>
-#if defined(LLVM_3_1)
-  #include <llvm/Target/TargetData.h>
-#elif defined(LLVM_3_2)
+#if defined(LLVM_3_2)
   #include <llvm/DataLayout.h>
 #else // LLVM 3.3+
   #include <llvm/IR/DataLayout.h>
@@ -94,12 +96,6 @@
 #include <llvm/Target/TargetMachine.h>
 #include <llvm/Analysis/Passes.h>
 #include <llvm/Support/raw_ostream.h>
-#include <llvm/Support/PatternMatch.h>
-#if defined(LLVM_3_1)
-  #include <llvm/Analysis/DebugInfo.h>
-#else
-  #include <llvm/DebugInfo.h>
-#endif
 #include <llvm/Support/Dwarf.h>
 #ifdef ISPC_IS_LINUX
   #include <alloca.h>
@@ -474,19 +470,19 @@ Optimize(llvm::Module *module, int optLevel) {
         new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple()));
     optPM.add(targetLibraryInfo);
 
-
-#if defined(LLVM_3_1)
-    optPM.add(new llvm::TargetData(*g->target->getDataLayout()));
+#if defined(LLVM_3_5)
+    optPM.add(new llvm::DataLayoutPass(*g->target->getDataLayout()));
 #else
     optPM.add(new llvm::DataLayout(*g->target->getDataLayout()));
+#endif
 
     llvm::TargetMachine *targetMachine = g->target->GetTargetMachine();
-  #ifdef LLVM_3_2
+
+#ifdef LLVM_3_2
     optPM.add(new llvm::TargetTransformInfo(targetMachine->getScalarTargetTransformInfo(),
                                             targetMachine->getVectorTargetTransformInfo()));
-  #else // LLVM 3.3+
+#else // LLVM 3.3+
     targetMachine->addAnalysisPasses(optPM.getPM());
-  #endif
 #endif
 
     optPM.add(llvm::createIndVarSimplifyPass());
diff --git a/opt.h b/opt.h
index 63c5d5b4..1e3584b9 100644
--- a/opt.h
+++ b/opt.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/parse.yy b/parse.yy
index 9a0377c5..39693b70 100644
--- a/parse.yy
+++ b/parse.yy
@@ -617,7 +617,7 @@ rate_qualified_type_specifier
     {
         if ($2 == NULL)
             $$ = NULL;
-        else if (Type::Equal($2, AtomicType::Void)) {
+        else if ($2->IsVoidType()) {
             Error(@1, "\"uniform\" qualifier is illegal with \"void\" type.");
             $$ = NULL;
         }
@@ -628,7 +628,7 @@ rate_qualified_type_specifier
     {
         if ($2 == NULL)
             $$ = NULL;
-        else if (Type::Equal($2, AtomicType::Void)) {
+        else if ($2->IsVoidType()) {
             Error(@1, "\"varying\" qualifier is illegal with \"void\" type.");
             $$ = NULL;
         }
@@ -1081,7 +1081,7 @@ specifier_qualifier_list
     {
         if ($2 != NULL) {
             if ($1 == TYPEQUAL_UNIFORM) {
-                if (Type::Equal($2, AtomicType::Void)) {
+                if ($2->IsVoidType()) {
                     Error(@1, "\"uniform\" qualifier is illegal with \"void\" type.");
                     $$ = NULL;
                 }
@@ -1089,7 +1089,7 @@ specifier_qualifier_list
                     $$ = $2->GetAsUniformType();
             }
             else if ($1 == TYPEQUAL_VARYING) {
-                if (Type::Equal($2, AtomicType::Void)) {
+                if ($2->IsVoidType()) {
                     Error(@1, "\"varying\" qualifier is illegal with \"void\" type.");
                     $$ = NULL;
                 }
diff --git a/stdlib.ispc b/stdlib.ispc
index 04d6f791..2ec3859e 100644
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -1,6 +1,6 @@
 // -*- mode: c++ -*-
 /*
-  Copyright (c) 2010-2012, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -29,13 +29,13 @@
    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
 /** @file stdlib.ispc
 
     @brief Portion of the ispc standard library implementation that's in
-           ispc code 
+           ispc code
 */
 
 #if (ISPC_MASK_BITS == 1)
@@ -186,52 +186,52 @@ static inline int16 broadcast(int16 v, uniform int i) {
     return __broadcast_i16(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int32 broadcast(int32 v, uniform int i) {
     return __broadcast_i32(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline double broadcast(double v, uniform int i) {
     return __broadcast_double(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int64 broadcast(int64 v, uniform int i) {
     return __broadcast_i64(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline float rotate(float v, uniform int i) {
     return __rotate_float(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int8 rotate(int8 v, uniform int i) {
     return __rotate_i8(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int16 rotate(int16 v, uniform int i) {
     return __rotate_i16(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int32 rotate(int32 v, uniform int i) {
     return __rotate_i32(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline double rotate(double v, uniform int i) {
     return __rotate_double(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int64 rotate(int64 v, uniform int i) {
     return __rotate_i64(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline float shift(float v, uniform int i) {
   varying float result;
   unmasked {
@@ -240,7 +240,7 @@ static inline float shift(float v, uniform int i) {
   return result;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int8 shift(int8 v, uniform int i) {
   varying int8 result;
   unmasked {
@@ -249,7 +249,7 @@ static inline int8 shift(int8 v, uniform int i) {
   return result;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int16 shift(int16 v, uniform int i) {
   varying int16 result;
   unmasked {
@@ -258,7 +258,7 @@ static inline int16 shift(int16 v, uniform int i) {
   return result;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int32 shift(int32 v, uniform int i) {
   varying int32 result;
   unmasked {
@@ -267,7 +267,7 @@ static inline int32 shift(int32 v, uniform int i) {
   return result;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline double shift(double v, uniform int i) {
   varying double result;
   unmasked {
@@ -276,7 +276,7 @@ static inline double shift(double v, uniform int i) {
   return result;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int64 shift(int64 v, uniform int i) {
   varying int64 result;
   unmasked {
@@ -285,184 +285,184 @@ static inline int64 shift(int64 v, uniform int i) {
   return result;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline float shuffle(float v, int i) {
     return __shuffle_float(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int8 shuffle(int8 v, int i) {
     return __shuffle_i8(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int16 shuffle(int16 v, int i) {
     return __shuffle_i16(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int32 shuffle(int32 v, int i) {
     return __shuffle_i32(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline double shuffle(double v, int i) {
     return __shuffle_double(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int64 shuffle(int64 v, int i) {
     return __shuffle_i64(v, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline float shuffle(float v0, float v1, int i) {
     return __shuffle2_float(v0, v1, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int8 shuffle(int8 v0, int8 v1, int i) {
     return __shuffle2_i8(v0, v1, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int16 shuffle(int16 v0, int16 v1, int i) {
     return __shuffle2_i16(v0, v1, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int32 shuffle(int32 v0, int32 v1, int i) {
     return __shuffle2_i32(v0, v1, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline double shuffle(double v0, double v1, int i) {
     return __shuffle2_double(v0, v1, i);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int64 shuffle(int64 v0, int64 v1, int i) {
     return __shuffle2_i64(v0, v1, i);
 }
 
 // x[i]
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform float extract(float x, uniform int i) {
     return floatbits(__extract_int32((int)intbits(x), i));
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform int8 extract(int8 x, uniform int i) {
     return __extract_int8(x, i);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform unsigned int8 extract(unsigned int8 x, uniform int i) {
     return __extract_int8(x, (unsigned int)i);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform int16 extract(int16 x, uniform int i) {
     return __extract_int16(x, i);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform unsigned int16 extract(unsigned int16 x, uniform int i) {
     return __extract_int16(x, (unsigned int)i);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform int32 extract(int32 x, uniform int i) {
     return __extract_int32(x, i);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform unsigned int32 extract(unsigned int32 x, uniform int i) {
     return __extract_int32(x, (unsigned int)i);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform double extract(double x, uniform int i) {
     return doublebits(__extract_int64((int64)intbits(x), i));
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform int64 extract(int64 x, uniform int i) {
     return __extract_int64(x, i);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform unsigned int64 extract(unsigned int64 x, uniform int i) {
     return __extract_int64(x, (unsigned int)i);
 }
 
 // x[i] = v
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline float insert(float x, uniform int i, uniform float v) {
     return floatbits(__insert_int32((int)intbits(x), i, (int)intbits(v)));
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline int8 insert(int8 x, uniform int i, uniform int8 v) {
     return __insert_int8(x, i, v);
 }
 
-__declspec(safe,cost1) 
-static inline unsigned int8 insert(unsigned int8 x, uniform int i, 
+__declspec(safe,cost1)
+static inline unsigned int8 insert(unsigned int8 x, uniform int i,
                                     uniform unsigned int8 v) {
     return __insert_int8(x, (unsigned int)i, v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline int16 insert(int16 x, uniform int i, uniform int16 v) {
     return __insert_int16(x, i, v);
 }
 
-__declspec(safe,cost1) 
-static inline unsigned int16 insert(unsigned int16 x, uniform int i, 
+__declspec(safe,cost1)
+static inline unsigned int16 insert(unsigned int16 x, uniform int i,
                                     uniform unsigned int16 v) {
     return __insert_int16(x, (unsigned int)i, v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline int32 insert(int32 x, uniform int i, uniform int32 v) {
     return __insert_int32(x, i, v);
 }
 
-__declspec(safe,cost1) 
-static inline unsigned int32 insert(unsigned int32 x, uniform int i, 
+__declspec(safe,cost1)
+static inline unsigned int32 insert(unsigned int32 x, uniform int i,
                                     uniform unsigned int32 v) {
     return __insert_int32(x, (unsigned int)i, v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline double insert(double x, uniform int i, uniform double v) {
     return doublebits(__insert_int64((int64)intbits(x), i, (int64)intbits(v)));
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline int64 insert(int64 x, uniform int i, uniform int64 v) {
     return __insert_int64(x, i, v);
 }
 
-__declspec(safe,cost1) 
-static inline unsigned int64 insert(unsigned int64 x, uniform int i, 
+__declspec(safe,cost1)
+static inline unsigned int64 insert(unsigned int64 x, uniform int i,
                                     uniform unsigned int64 v) {
     return __insert_int64(x, (unsigned int)i, v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform int32 sign_extend(uniform bool v) {
     return __sext_uniform_bool(v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline int32 sign_extend(bool v) {
     return __sext_varying_bool(v);
 }
 
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform bool any(bool v) {
     // We only care about whether "any" is true for the active program instances,
     // so we have to make v with the current program mask.
@@ -473,7 +473,7 @@ static inline uniform bool any(bool v) {
 #endif
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform bool all(bool v) {
     // As with any(), we need to explicitly mask v with the current program mask
     // so we're only looking at the current lanes
@@ -495,17 +495,17 @@ static inline uniform bool none(bool v) {
 #endif
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform int32 popcnt(uniform int32 v) {
     return __popcnt_int32(v);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform int popcnt(uniform int64 v) {
     return (int32)__popcnt_int64(v);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int popcnt(int v) {
     int r;
     for (uniform int i = 0; i < programCount; ++i)
@@ -513,7 +513,7 @@ static inline int popcnt(int v) {
     return __mask ? r : 0;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int popcnt(int64 v) {
     int r;
     for (uniform int i = 0; i < programCount; ++i)
@@ -521,7 +521,7 @@ static inline int popcnt(int64 v) {
     return __mask ? r : 0;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform int popcnt(bool v) {
     // As with any() and all(), only count across the active lanes
 #if (ISPC_MASK_BITS == 1)
@@ -534,7 +534,7 @@ static inline uniform int popcnt(bool v) {
 #endif
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform unsigned int64 lanemask() {
     return __movmsk(__mask);
 }
@@ -542,17 +542,17 @@ static inline uniform unsigned int64 lanemask() {
 ///////////////////////////////////////////////////////////////////////////
 // memcpy/memmove/memset
 
-static inline void memcpy(void * uniform dst, void * uniform src, 
+static inline void memcpy(void * uniform dst, void * uniform src,
                           uniform int32 count) {
     __memcpy32((int8 * uniform)dst, (int8 * uniform)src, count);
 }
 
-static inline void memcpy64(void * uniform dst, void * uniform src, 
+static inline void memcpy64(void * uniform dst, void * uniform src,
                           uniform int64 count) {
     __memcpy64((int8 * uniform)dst, (int8 * uniform)src, count);
 }
 
-static inline void memcpy(void * varying dst, void * varying src, 
+static inline void memcpy(void * varying dst, void * varying src,
                           int32 count) {
     void * uniform da[programCount];
     void * uniform sa[programCount];
@@ -566,7 +566,7 @@ static inline void memcpy(void * varying dst, void * varying src,
     }
 }
 
-static inline void memcpy64(void * varying dst, void * varying src, 
+static inline void memcpy64(void * varying dst, void * varying src,
                             int64 count) {
     void * uniform da[programCount];
     void * uniform sa[programCount];
@@ -580,17 +580,17 @@ static inline void memcpy64(void * varying dst, void * varying src,
     }
 }
 
-static inline void memmove(void * uniform dst, void * uniform src, 
+static inline void memmove(void * uniform dst, void * uniform src,
                           uniform int32 count) {
     __memmove32((int8 * uniform)dst, (int8 * uniform)src, count);
 }
 
-static inline void memmove64(void * uniform dst, void * uniform src, 
+static inline void memmove64(void * uniform dst, void * uniform src,
                              uniform int64 count) {
     __memmove64((int8 * uniform)dst, (int8 * uniform)src, count);
 }
 
-static inline void memmove(void * varying dst, void * varying src, 
+static inline void memmove(void * varying dst, void * varying src,
                           int32 count) {
     void * uniform da[programCount];
     void * uniform sa[programCount];
@@ -604,7 +604,7 @@ static inline void memmove(void * varying dst, void * varying src,
     }
 }
 
-static inline void memmove64(void * varying dst, void * varying src, 
+static inline void memmove64(void * varying dst, void * varying src,
                              int64 count) {
     void * uniform da[programCount];
     void * uniform sa[programCount];
@@ -618,12 +618,12 @@ static inline void memmove64(void * varying dst, void * varying src,
     }
 }
 
-static inline void memset(void * uniform ptr, uniform int8 val, 
+static inline void memset(void * uniform ptr, uniform int8 val,
                           uniform int32 count) {
     __memset32((int8 * uniform)ptr, val, count);
 }
 
-static inline void memset64(void * uniform ptr, uniform int8 val, 
+static inline void memset64(void * uniform ptr, uniform int8 val,
                           uniform int64 count) {
     __memset64((int8 * uniform)ptr, val, count);
 }
@@ -649,55 +649,55 @@ static inline void memset64(void * varying ptr, int8 val, int64 count) {
 ///////////////////////////////////////////////////////////////////////////
 // count leading/trailing zeros
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform unsigned int32
 count_leading_zeros(uniform unsigned int32 v) {
     return __count_leading_zeros_i32(v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform unsigned int64
 count_leading_zeros(uniform unsigned int64 v) {
     return __count_leading_zeros_i64(v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform unsigned int32
 count_trailing_zeros(uniform unsigned int32 v) {
     return __count_trailing_zeros_i32(v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform unsigned int64
 count_trailing_zeros(uniform unsigned int64 v) {
     return __count_trailing_zeros_i64(v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform int32
 count_leading_zeros(uniform int32 v) {
     return __count_leading_zeros_i32(v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform int64
 count_leading_zeros(uniform int64 v) {
     return __count_leading_zeros_i64(v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform int32
 count_trailing_zeros(uniform int32 v) {
     return __count_trailing_zeros_i32(v);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform int64
 count_trailing_zeros(uniform int64 v) {
     return __count_trailing_zeros_i64(v);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline unsigned int32
 count_leading_zeros(unsigned int32 v) {
     unsigned int32 r;
@@ -706,7 +706,7 @@ count_leading_zeros(unsigned int32 v) {
     return r;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline unsigned int64
 count_leading_zeros(unsigned int64 v) {
     unsigned int64 r;
@@ -715,7 +715,7 @@ count_leading_zeros(unsigned int64 v) {
     return r;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline unsigned int32
 count_trailing_zeros(unsigned int32 v) {
     unsigned int32 r;
@@ -724,7 +724,7 @@ count_trailing_zeros(unsigned int32 v) {
     return r;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline unsigned int64
 count_trailing_zeros(unsigned int64 v) {
     unsigned int64 r;
@@ -733,7 +733,7 @@ count_trailing_zeros(unsigned int64 v) {
     return r;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int32
 count_leading_zeros(int32 v) {
     int32 r;
@@ -742,7 +742,7 @@ count_leading_zeros(int32 v) {
     return r;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int64
 count_leading_zeros(int64 v) {
     int64 r;
@@ -751,7 +751,7 @@ count_leading_zeros(int64 v) {
     return r;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int32
 count_trailing_zeros(int32 v) {
     int32 r;
@@ -760,7 +760,7 @@ count_trailing_zeros(int32 v) {
     return r;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline int64
 count_trailing_zeros(int64 v) {
     int64 r;
@@ -773,7 +773,7 @@ count_trailing_zeros(int64 v) {
 // AOS/SOA conversion
 
 static inline void
-aos_to_soa3(uniform float a[], varying float * uniform v0, 
+aos_to_soa3(uniform float a[], varying float * uniform v0,
             varying float * uniform v1, varying float * uniform v2) {
     __aos_to_soa3_float(a, v0, v1, v2);
 }
@@ -798,7 +798,7 @@ soa_to_aos4(float v0, float v1, float v2, float v3, uniform float a[]) {
 static inline void
 aos_to_soa3(uniform int32 a[], varying int32 * uniform v0,
             varying int32 * uniform v1, varying int32 * uniform v2) {
-    aos_to_soa3((uniform float * uniform)a, (varying float * uniform)v0, 
+    aos_to_soa3((uniform float * uniform)a, (varying float * uniform)v0,
                 (varying float * uniform)v1, (varying float * uniform)v2);
 }
 
@@ -809,39 +809,39 @@ soa_to_aos3(int32 v0, int32 v1, int32 v2, uniform int32 a[]) {
 }
 
 static inline void
-aos_to_soa4(uniform int32 a[], varying int32 * uniform v0, 
-            varying int32 * uniform v1, varying int32 * uniform v2, 
+aos_to_soa4(uniform int32 a[], varying int32 * uniform v0,
+            varying int32 * uniform v1, varying int32 * uniform v2,
             varying int32 * uniform v3) {
-    aos_to_soa4((uniform float * uniform)a, (varying float * uniform )v0, 
-                (varying float * uniform)v1, (varying float * uniform)v2, 
+    aos_to_soa4((uniform float * uniform)a, (varying float * uniform )v0,
+                (varying float * uniform)v1, (varying float * uniform)v2,
                 (varying float * uniform)v3);
 }
 
 static inline void
 soa_to_aos4(int32 v0, int32 v1, int32 v2, int32 v3, uniform int32 a[]) {
-    soa_to_aos4(floatbits(v0), floatbits(v1), floatbits(v2), floatbits(v3), 
+    soa_to_aos4(floatbits(v0), floatbits(v1), floatbits(v2), floatbits(v3),
                 (uniform float * uniform)a);
 }
 
 ///////////////////////////////////////////////////////////////////////////
 // Prefetching
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline void prefetch_l1(const void * uniform ptr) {
     __prefetch_read_uniform_1((uniform int8 * uniform)ptr);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline void prefetch_l2(const void * uniform ptr) {
     __prefetch_read_uniform_2((uniform int8 * uniform)ptr);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline void prefetch_l3(const void * uniform ptr) {
     __prefetch_read_uniform_3((uniform int8 * uniform)ptr);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline void prefetch_nt(const void * uniform ptr) {
      __prefetch_read_uniform_nt((uniform int8 * uniform)ptr);
 }
@@ -1028,13 +1028,13 @@ static inline uniform unsigned int32 reduce_add(unsigned int16 x) {
     return __reduce_add_int16(__mask ? x : (int16)0);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform float reduce_add(float x) {
     // zero the lanes where the mask is off
     return __reduce_add_float(__mask ? x : 0.);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform float reduce_min(float v) {
     // For the lanes where the mask is off, replace the given value with
     // infinity, so that it doesn't affect the result.
@@ -1049,7 +1049,7 @@ static inline uniform float reduce_min(float v) {
     return result;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform float reduce_max(float v) {
     // For the lanes where the mask is off, replace the given value with
     // negative infinity, so that it doesn't affect the result.
@@ -1064,13 +1064,13 @@ static inline uniform float reduce_max(float v) {
     return result;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform int64 reduce_add(int32 x) {
     // Zero out the values for lanes that aren't running
     return __reduce_add_int32(__mask ? x : 0);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform int reduce_min(int v) {
     // Set values for non-running lanes to the maximum integer value so
     // they don't affect the result.
@@ -1078,7 +1078,7 @@ static inline uniform int reduce_min(int v) {
     return __reduce_min_int32(__mask ? v : int_max);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform int reduce_max(int v) {
     // Set values for non-running lanes to the minimum integer value so
     // they don't affect the result.
@@ -1086,14 +1086,14 @@ static inline uniform int reduce_max(int v) {
     return __reduce_max_int32(__mask ? v : int_min);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform unsigned int64 reduce_add(unsigned int32 x) {
     // Set values for non-running lanes to zero so they don't affect the
     // result.
     return __reduce_add_int32(__mask ? x : 0);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform unsigned int reduce_min(unsigned int v) {
     // Set values for non-running lanes to the maximum unsigned integer
     // value so they don't affect the result.
@@ -1101,20 +1101,20 @@ static inline uniform unsigned int reduce_min(unsigned int v) {
     return __reduce_min_uint32(__mask ? v : uint_max);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform unsigned int reduce_max(unsigned int v) {
     // Set values for non-running lanes to zero so they don't affect the
     // result.
     return __reduce_max_uint32(__mask ? v : 0);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform double reduce_add(double x) {
     // zero the lanes where the mask is off
     return __reduce_add_double(__mask ? x : 0.);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform double reduce_min(double v) {
     int64 iflt_max = 0x7ff0000000000000; // infinity
     // unmasked block is needed to make sure that argument for unmasked
@@ -1127,7 +1127,7 @@ static inline uniform double reduce_min(double v) {
     return result;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform double reduce_max(double v) {
     const int64 iflt_neg_max = 0xfff0000000000000; // -infinity
     // unmasked block is needed to make sure that argument for unmasked
@@ -1140,13 +1140,13 @@ static inline uniform double reduce_max(double v) {
     return result;
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform int64 reduce_add(int64 x) {
     // Zero out the values for lanes that aren't running
     return __reduce_add_int64(__mask ? x : 0);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform int64 reduce_min(int64 v) {
     // Set values for non-running lanes to the maximum integer value so
     // they don't affect the result.
@@ -1154,7 +1154,7 @@ static inline uniform int64 reduce_min(int64 v) {
     return __reduce_min_int64(__mask ? v : int_max);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform int64 reduce_max(int64 v) {
     // Set values for non-running lanes to the minimum integer value so
     // they don't affect the result.
@@ -1162,14 +1162,14 @@ static inline uniform int64 reduce_max(int64 v) {
     return __reduce_max_int64(__mask ? v : int_min);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform unsigned int64 reduce_add(unsigned int64 x) {
     // Set values for non-running lanes to zero so they don't affect the
     // result.
     return __reduce_add_int64(__mask ? x : 0);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform unsigned int64 reduce_min(unsigned int64 v) {
     // Set values for non-running lanes to the maximum unsigned integer
     // value so they don't affect the result.
@@ -1177,7 +1177,7 @@ static inline uniform unsigned int64 reduce_min(unsigned int64 v) {
     return __reduce_min_uint64(__mask ? v : uint_max);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform unsigned int64 reduce_max(unsigned int64 v) {
     // Set values for non-running lanes to zero so they don't affect the
     // result.
@@ -1261,7 +1261,7 @@ static unsigned int64 exclusive_scan_or(unsigned int64 v) {
 ///////////////////////////////////////////////////////////////////////////
 // packed load, store
 
-static inline uniform int 
+static inline uniform int
 packed_load_active(uniform unsigned int a[],
                    varying unsigned int * uniform vals) {
     return __packed_load_active(a, vals, (UIntMaskType)__mask);
@@ -1280,12 +1280,12 @@ packed_store_active2(uniform unsigned int a[],
 }
 
 
-static inline uniform int 
+static inline uniform int
 packed_load_active(uniform int a[], varying int * uniform vals) {
     return __packed_load_active(a, vals, (IntMaskType)__mask);
 }
 
-static inline uniform int 
+static inline uniform int
 packed_store_active(uniform int a[], int vals) {
     return __packed_store_active(a, vals, (IntMaskType)__mask);
 }
@@ -1311,7 +1311,7 @@ static inline uniform int num_cores() {
     return __num_cores();
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform int64 clock() {
     return __clock();
 }
@@ -1339,7 +1339,7 @@ static inline bool isnan(double v) {
     return v != v;
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline float abs(float a) {
     // Floating-point hack: zeroing the high bit clears the sign
     unsigned int i = intbits(a);
@@ -1347,14 +1347,14 @@ static inline float abs(float a) {
     return floatbits(i);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform float abs(uniform float a) {
     uniform unsigned int i = intbits(a);
     i &= 0x7fffffff;
     return floatbits(i);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline double abs(double a) {
     // zeroing the high bit clears the sign
     unsigned int64 i = intbits(a);
@@ -1362,103 +1362,103 @@ static inline double abs(double a) {
     return doublebits(i);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform double abs(uniform double a) {
     uniform unsigned int64 i = intbits(a);
     i &= 0x7fffffffffffffff;
     return doublebits(i);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline unsigned int signbits(float x) {
     unsigned int i = intbits(x);
     return (i & 0x80000000);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform unsigned int signbits(uniform float x) {
     uniform unsigned int i = intbits(x);
     return (i & 0x80000000);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline unsigned int64 signbits(double x) {
     unsigned int64 i = intbits(x);
     return (i & 0x8000000000000000);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform unsigned int64 signbits(uniform double x) {
     uniform unsigned int64 i = intbits(x);
     return (i & 0x8000000000000000);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline float round(float x) {
     return __round_varying_float(x);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline uniform float round(uniform float x) {
     return __round_uniform_float(x);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline double round(double x) {
     return __round_varying_double(x);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline uniform double round(uniform double x) {
     return __round_uniform_double(x);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline float floor(float x) {
     return __floor_varying_float(x);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline uniform float floor(uniform float x) {
     return __floor_uniform_float(x);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline double floor(double x) {
     return __floor_varying_double(x);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline uniform double floor(uniform double x) {
     return __floor_uniform_double(x);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline float ceil(float x) {
     return __ceil_varying_float(x);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline uniform float ceil(uniform float x) {
     return __ceil_uniform_float(x);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline double ceil(double x) {
     return __ceil_varying_double(x);
 }
 
-__declspec(safe,cost2) 
+__declspec(safe,cost2)
 static inline uniform double ceil(uniform double x) {
     return __ceil_uniform_double(x);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline float rcp(float v) {
     return __rcp_varying_float(v);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform float rcp(uniform float v) {
     return __rcp_uniform_float(v);
 }
@@ -1480,16 +1480,16 @@ static inline QUAL double __rcp_safe_##QUAL##_double(QUAL double x) \
   QUAL double exp  = doublebits(  0x7fd0000000000000 + ~ex      );   \
   QUAL double   y  = rcp((QUAL float)(x*exp)); \
   return __rcp_iterate_##QUAL##_double(x, y*exp); \
-} 
+}
 
 RCPD(varying)
-__declspec(safe)   
-static inline double rcp(double v) {   
+__declspec(safe)
+static inline double rcp(double v) {
   if (__have_native_rcpd)
     return __rcp_varying_double(v);
   else
     return __rcp_safe_varying_double(v);
-}   
+}
 
 RCPD(uniform)
 __declspec(safe)
@@ -1505,22 +1505,22 @@ static inline uniform double rcp(uniform double v) {
 
 // float
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline float min(float a, float b) {
     return __min_varying_float(a, b);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform float min(uniform float a, uniform float b) {
     return __min_uniform_float(a, b);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline float max(float a, float b) {
     return __max_varying_float(a, b);
 }
 
-__declspec(safe,cost1) 
+__declspec(safe,cost1)
 static inline uniform float max(uniform float a, uniform float b) {
     return __max_uniform_float(a, b);
 }
@@ -1528,22 +1528,22 @@ static inline uniform float max(uniform float a, uniform float b) {
 
 // double
 
-__declspec(safe) 
+__declspec(safe)
 static inline double min(double a, double b) {
     return __min_varying_double(a, b);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform double min(uniform double a, uniform double b) {
     return __min_uniform_double(a, b);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline double max(double a, double b) {
     return __max_varying_double(a, b);
 }
 
-__declspec(safe) 
+__declspec(safe)
 static inline uniform double max(uniform double a, uniform double b) {
     return __max_uniform_double(a, b);
 }
@@ -1557,7 +1557,7 @@ static inline uniform unsigned int8 min(uniform unsigned int8 a,
 }
 
 __declspec(safe,cost1)
-static inline uniform unsigned int8 max(uniform unsigned int8 a, 
+static inline uniform unsigned int8 max(uniform unsigned int8 a,
                                         uniform unsigned int8 b) {
     return (a > b) ? a : b;
 }
@@ -1595,13 +1595,13 @@ static inline int8 max(int8 a, int8 b) {
 // int16
 
 __declspec(safe,cost1)
-static inline uniform unsigned int16 min(uniform unsigned int16 a, 
+static inline uniform unsigned int16 min(uniform unsigned int16 a,
                                          uniform unsigned int16 b) {
     return (a < b) ? a : b;
 }
 
 __declspec(safe,cost1)
-static inline uniform unsigned int16 max(uniform unsigned int16 a, 
+static inline uniform unsigned int16 max(uniform unsigned int16 a,
                                          uniform unsigned int16 b) {
     return (a > b) ? a : b;
 }
@@ -1750,14 +1750,14 @@ static inline uniform double clamp(uniform double v, uniform double low, uniform
 // int8
 
 __declspec(safe,cost2)
-static inline unsigned int8 clamp(unsigned int8 v, unsigned int8 low, 
+static inline unsigned int8 clamp(unsigned int8 v, unsigned int8 low,
                                    unsigned int8 high) {
     return min(max(v, low), high);
 }
 
 __declspec(safe,cost2)
-static inline uniform unsigned int8 clamp(uniform unsigned int8 v, 
-                                           uniform unsigned int8 low, 
+static inline uniform unsigned int8 clamp(uniform unsigned int8 v,
+                                           uniform unsigned int8 low,
                                            uniform unsigned int8 high) {
     return min(max(v, low), high);
 }
@@ -1768,7 +1768,7 @@ static inline int8 clamp(int8 v, int8 low, int8 high) {
 }
 
 __declspec(safe,cost2)
-static inline uniform int8 clamp(uniform int8 v, uniform int8 low, 
+static inline uniform int8 clamp(uniform int8 v, uniform int8 low,
                                   uniform int8 high) {
     return min(max(v, low), high);
 }
@@ -1776,14 +1776,14 @@ static inline uniform int8 clamp(uniform int8 v, uniform int8 low,
 // int16
 
 __declspec(safe,cost2)
-static inline unsigned int16 clamp(unsigned int16 v, unsigned int16 low, 
+static inline unsigned int16 clamp(unsigned int16 v, unsigned int16 low,
                                    unsigned int16 high) {
     return min(max(v, low), high);
 }
 
 __declspec(safe,cost2)
-static inline uniform unsigned int16 clamp(uniform unsigned int16 v, 
-                                           uniform unsigned int16 low, 
+static inline uniform unsigned int16 clamp(uniform unsigned int16 v,
+                                           uniform unsigned int16 low,
                                            uniform unsigned int16 high) {
     return min(max(v, low), high);
 }
@@ -1794,7 +1794,7 @@ static inline int16 clamp(int16 v, int16 low, int16 high) {
 }
 
 __declspec(safe,cost2)
-static inline uniform int16 clamp(uniform int16 v, uniform int16 low, 
+static inline uniform int16 clamp(uniform int16 v, uniform int16 low,
                                   uniform int16 high) {
     return min(max(v, low), high);
 }
@@ -1807,7 +1807,7 @@ static inline unsigned int clamp(unsigned int v, unsigned int low, unsigned int
 }
 
 __declspec(safe,cost2)
-static inline uniform unsigned int clamp(uniform unsigned int v, uniform unsigned int low, 
+static inline uniform unsigned int clamp(uniform unsigned int v, uniform unsigned int low,
                                          uniform unsigned int high) {
     return min(max(v, low), high);
 }
@@ -1825,14 +1825,14 @@ static inline uniform int clamp(uniform int v, uniform int low, uniform int high
 // int64
 
 __declspec(safe,cost2)
-static inline unsigned int64 clamp(unsigned int64 v, unsigned int64 low, 
+static inline unsigned int64 clamp(unsigned int64 v, unsigned int64 low,
                                    unsigned int64 high) {
     return min(max(v, low), high);
 }
 
 __declspec(safe,cost2)
-static inline uniform unsigned int64 clamp(uniform unsigned int64 v, 
-                                           uniform unsigned int64 low, 
+static inline uniform unsigned int64 clamp(uniform unsigned int64 v,
+                                           uniform unsigned int64 low,
                                            uniform unsigned int64 high) {
     return min(max(v, low), high);
 }
@@ -1843,7 +1843,7 @@ static inline int64 clamp(int64 v, int64 low, int64 high) {
 }
 
 __declspec(safe,cost2)
-static inline uniform int64 clamp(uniform int64 v, uniform int64 low, 
+static inline uniform int64 clamp(uniform int64 v, uniform int64 low,
                                   uniform int64 high) {
     return min(max(v, low), high);
 }
@@ -2085,8 +2085,8 @@ static inline void *atomic_swap_global(void ** ptr, void * value) {
                                       (intptr_t)value);
 }
 
-static inline void * 
-atomic_compare_exchange_global(void ** uniform ptr, 
+static inline void *
+atomic_compare_exchange_global(void ** uniform ptr,
                                void * oldval, void * newval) {
     return (void *)atomic_compare_exchange_global((intptr_t * uniform)ptr,
                                                   (intptr_t)oldval,
@@ -2094,8 +2094,8 @@ atomic_compare_exchange_global(void ** uniform ptr,
 }
 
 static inline void * uniform
-atomic_compare_exchange_global(void ** uniform ptr, void * uniform oldval, 
-                               void * uniform newval) { 
+atomic_compare_exchange_global(void ** uniform ptr, void * uniform oldval,
+                               void * uniform newval) {
     return (void * uniform)atomic_compare_exchange_global((intptr_t * uniform)ptr,
                                                           (uniform intptr_t)oldval,
                                                           (uniform intptr_t)newval);
@@ -2153,17 +2153,17 @@ static inline uniform int32 __or(uniform int32 a, uniform int32 b) { return a |
 static inline uniform int32 __xor(uniform int32 a, uniform int32 b) { return a ^ b; }
 static inline uniform int32 __swap(uniform int32 a, uniform int32 b) { return b; }
 
-static inline uniform unsigned int32 __add(uniform unsigned int32 a, 
+static inline uniform unsigned int32 __add(uniform unsigned int32 a,
                                            uniform unsigned int32 b) { return a+b; }
-static inline uniform unsigned int32 __sub(uniform unsigned int32 a, 
+static inline uniform unsigned int32 __sub(uniform unsigned int32 a,
                                            uniform unsigned int32 b) { return a-b; }
-static inline uniform unsigned int32 __and(uniform unsigned int32 a, 
+static inline uniform unsigned int32 __and(uniform unsigned int32 a,
                                            uniform unsigned int32 b) { return a & b; }
-static inline uniform unsigned int32 __or(uniform unsigned int32 a, 
+static inline uniform unsigned int32 __or(uniform unsigned int32 a,
                                           uniform unsigned int32 b) { return a | b; }
-static inline uniform unsigned int32 __xor(uniform unsigned int32 a, 
+static inline uniform unsigned int32 __xor(uniform unsigned int32 a,
                                            uniform unsigned int32 b) { return a ^ b; }
-static inline uniform unsigned int32 __swap(uniform unsigned int32 a, 
+static inline uniform unsigned int32 __swap(uniform unsigned int32 a,
                                             uniform unsigned int32 b) { return b; }
 
 
@@ -2178,17 +2178,17 @@ static inline uniform int64 __or(uniform int64 a, uniform int64 b) { return a |
 static inline uniform int64 __xor(uniform int64 a, uniform int64 b) { return a ^ b; }
 static inline uniform int64 __swap(uniform int64 a, uniform int64 b) { return b; }
 
-static inline uniform unsigned int64 __add(uniform unsigned int64 a, 
+static inline uniform unsigned int64 __add(uniform unsigned int64 a,
                                            uniform unsigned int64 b) { return a+b; }
-static inline uniform unsigned int64 __sub(uniform unsigned int64 a, 
+static inline uniform unsigned int64 __sub(uniform unsigned int64 a,
                                            uniform unsigned int64 b) { return a-b; }
-static inline uniform unsigned int64 __and(uniform unsigned int64 a, 
+static inline uniform unsigned int64 __and(uniform unsigned int64 a,
                                            uniform unsigned int64 b) { return a & b; }
-static inline uniform unsigned int64 __or(uniform unsigned int64 a, 
+static inline uniform unsigned int64 __or(uniform unsigned int64 a,
                                           uniform unsigned int64 b) { return a | b; }
-static inline uniform unsigned int64 __xor(uniform unsigned int64 a, 
+static inline uniform unsigned int64 __xor(uniform unsigned int64 a,
                                            uniform unsigned int64 b) { return a ^ b; }
-static inline uniform unsigned int64 __swap(uniform unsigned int64 a, 
+static inline uniform unsigned int64 __swap(uniform unsigned int64 a,
                                             uniform unsigned int64 b) { return b; }
 
 static inline uniform double __add(uniform double a, uniform double b) { return a+b; }
@@ -2307,8 +2307,8 @@ static inline void *atomic_swap_local(void ** ptr, void * value) {
                                       (intptr_t)value);
 }
 
-static inline void * 
-atomic_compare_exchange_local(void ** uniform ptr, 
+static inline void *
+atomic_compare_exchange_local(void ** uniform ptr,
                               void * oldval, void * newval) {
     return (void *)atomic_compare_exchange_local((intptr_t * uniform)ptr,
                                                   (intptr_t)oldval,
@@ -2316,8 +2316,8 @@ atomic_compare_exchange_local(void ** uniform ptr,
 }
 
 static inline void * uniform
-atomic_compare_exchange_local(void ** uniform ptr, void * uniform oldval, 
-                              void * uniform newval) { 
+atomic_compare_exchange_local(void ** uniform ptr, void * uniform oldval,
+                              void * uniform newval) {
     return (void * uniform)atomic_compare_exchange_local((intptr_t * uniform)ptr,
                                                           (uniform intptr_t)oldval,
                                                           (uniform intptr_t)newval);
@@ -2403,7 +2403,7 @@ static inline uniform float frexp(uniform float x, uniform int * uniform pw2) {
 
 __declspec(safe)
 static inline float sin(float x_full) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __sin_varying_float(x_full);
     }
@@ -2418,7 +2418,7 @@ static inline float sin(float x_full) {
         }
         return ret;
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         static const float pi_over_two_vec = 1.57079637050628662109375;
         static const float two_over_pi_vec = 0.636619746685028076171875;
@@ -2469,7 +2469,7 @@ static inline float sin(float x_full) {
 
 __declspec(safe)
 static inline uniform float sin(uniform float x_full) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __sin_uniform_float(x_full);
     }
@@ -2477,7 +2477,7 @@ static inline uniform float sin(uniform float x_full) {
         __math_lib == __math_lib_svml) {
         return __stdlib_sinf(x_full);
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         static const uniform float pi_over_two_vec = 1.57079637050628662109375;
         static const uniform float two_over_pi_vec = 0.636619746685028076171875;
@@ -2544,13 +2544,13 @@ static inline float asin(float x0) {
     bool isnan = (x > 1);
     float v;
 
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __asin_varying_float(x0);
     }
     else if (__math_lib == __math_lib_svml) {
         return __svml_asinf(x0);
-    } 
+    }
     else if (__math_lib == __math_lib_system) {
         float ret;
         foreach_active (i) {
@@ -2565,15 +2565,15 @@ static inline float asin(float x0) {
         // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
         //           [|single...|], [1e-20;.9999999999999999]);
         // avg error: 8.5716801e-09, max error: 2.1373853e-07
-        v = 1.57079637050628662109375f + 
-            x * (-0.21460501849651336669921875f + 
-            x * (8.9116774499416351318359375e-2f + 
-            x * (-5.146093666553497314453125e-2f + 
-            x * (3.7269376218318939208984375e-2f + 
-            x * (-3.5882405936717987060546875e-2f + 
+        v = 1.57079637050628662109375f +
+            x * (-0.21460501849651336669921875f +
+            x * (8.9116774499416351318359375e-2f +
+            x * (-5.146093666553497314453125e-2f +
+            x * (3.7269376218318939208984375e-2f +
+            x * (-3.5882405936717987060546875e-2f +
             x * (4.14929799735546112060546875e-2f +
             x * (-4.25077490508556365966796875e-2f +
-            x * (3.05023305118083953857421875e-2f + 
+            x * (3.05023305118083953857421875e-2f +
             x * (-1.2897425331175327301025390625e-2f +
             x * 2.38926825113594532012939453125e-3f)))))))));
     }
@@ -2583,11 +2583,11 @@ static inline float asin(float x0) {
         // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
         //           [1e-20;.9999999999999999]);
         // avg error: 1.1105439e-06, max error 1.3187528e-06
-        v = 1.57079517841339111328125f + 
-             x * (-0.21450997889041900634765625f + 
-             x * (8.78556668758392333984375e-2f + 
-             x * (-4.489909112453460693359375e-2f + 
-             x * (1.928029954433441162109375e-2f + 
+        v = 1.57079517841339111328125f +
+             x * (-0.21450997889041900634765625f +
+             x * (8.78556668758392333984375e-2f +
+             x * (-4.489909112453460693359375e-2f +
+             x * (1.928029954433441162109375e-2f +
              x * (-4.3095736764371395111083984375e-3f)))));
     }
 
@@ -2609,7 +2609,7 @@ static inline uniform float asin(uniform float x0) {
     uniform float x = abs(x0);
     uniform bool isnan = (x > 1);
     uniform float v;
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __asin_uniform_float(x0);
     }
@@ -2623,15 +2623,15 @@ static inline uniform float asin(uniform float x0) {
         // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5,6,7,8,9,10|],
         //           [|single...|], [1e-20;.9999999999999999]);
         // avg error: 8.5716801e-09, max error: 2.1373853e-07
-        v = 1.57079637050628662109375f + 
-            x * (-0.21460501849651336669921875f + 
-            x * (8.9116774499416351318359375e-2f + 
-            x * (-5.146093666553497314453125e-2f + 
-            x * (3.7269376218318939208984375e-2f + 
-            x * (-3.5882405936717987060546875e-2f + 
+        v = 1.57079637050628662109375f +
+            x * (-0.21460501849651336669921875f +
+            x * (8.9116774499416351318359375e-2f +
+            x * (-5.146093666553497314453125e-2f +
+            x * (3.7269376218318939208984375e-2f +
+            x * (-3.5882405936717987060546875e-2f +
             x * (4.14929799735546112060546875e-2f +
             x * (-4.25077490508556365966796875e-2f +
-            x * (3.05023305118083953857421875e-2f + 
+            x * (3.05023305118083953857421875e-2f +
             x * (-1.2897425331175327301025390625e-2f +
             x * 2.38926825113594532012939453125e-3f)))))))));
     }
@@ -2641,11 +2641,11 @@ static inline uniform float asin(uniform float x0) {
         // fpminimax(((asin(x)-pi/2)/-sqrt(1-x)), [|0,1,2,3,4,5|],[|single...|],
         //           [1e-20;.9999999999999999]);
         // avg error: 1.1105439e-06, max error 1.3187528e-06
-        v = 1.57079517841339111328125f + 
-             x * (-0.21450997889041900634765625f + 
-             x * (8.78556668758392333984375e-2f + 
-             x * (-4.489909112453460693359375e-2f + 
-             x * (1.928029954433441162109375e-2f + 
+        v = 1.57079517841339111328125f +
+             x * (-0.21450997889041900634765625f +
+             x * (8.78556668758392333984375e-2f +
+             x * (-4.489909112453460693359375e-2f +
+             x * (1.928029954433441162109375e-2f +
              x * (-4.3095736764371395111083984375e-3f)))));
     }
 
@@ -2663,7 +2663,7 @@ static inline uniform float asin(uniform float x0) {
 
 __declspec(safe)
 static inline float cos(float x_full) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __cos_varying_float(x_full);
     }
@@ -2678,7 +2678,7 @@ static inline float cos(float x_full) {
         }
         return ret;
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         static const float pi_over_two_vec = 1.57079637050628662109375;
         static const float two_over_pi_vec = 0.636619746685028076171875;
@@ -2728,7 +2728,7 @@ static inline float cos(float x_full) {
 
 __declspec(safe)
 static inline uniform float cos(uniform float x_full) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __cos_uniform_float(x_full);
     }
@@ -2736,7 +2736,7 @@ static inline uniform float cos(uniform float x_full) {
         __math_lib == __math_lib_svml) {
         return __stdlib_cosf(x_full);
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         static const uniform float pi_over_two_vec = 1.57079637050628662109375;
         static const uniform float two_over_pi_vec = 0.636619746685028076171875;
@@ -2797,7 +2797,7 @@ static inline uniform float cos(uniform float x_full) {
 
 __declspec(safe)
 static inline float acos(float v) {
-  if (__have_native_trigonometry) 
+  if (__have_native_trigonometry)
     return __acos_varying_float(v);
   else
     return 1.57079637050628662109375 - asin(v);
@@ -2805,7 +2805,7 @@ static inline float acos(float v) {
 
 __declspec(safe)
 static inline double acos(const double v) {
-  if (__have_native_trigonometry) 
+  if (__have_native_trigonometry)
     return __acos_varying_double(v);
   else
     return 1.57079637050628662109375d0 - asin(v);
@@ -2814,7 +2814,7 @@ static inline double acos(const double v) {
 
 __declspec(safe)
 static inline uniform float acos(uniform float v) {
-  if (__have_native_trigonometry) 
+  if (__have_native_trigonometry)
     return __acos_uniform_float(v);
   else
     return 1.57079637050628662109375 - asin(v);
@@ -2822,7 +2822,7 @@ static inline uniform float acos(uniform float v) {
 
 __declspec(safe)
 static inline uniform double acos(const uniform double v) {
-  if (__have_native_trigonometry) 
+  if (__have_native_trigonometry)
     return __acos_uniform_double(v);
   else
     return 1.57079637050628662109375d0 - asin(v);
@@ -2830,9 +2830,9 @@ static inline uniform double acos(const uniform double v) {
 
 
 __declspec(safe)
-static inline void sincos(float x_full, varying float * uniform sin_result, 
+static inline void sincos(float x_full, varying float * uniform sin_result,
                           varying float * uniform cos_result) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       __sincos_varying_float(x_full,sin_result,cos_result);
     }
@@ -2847,7 +2847,7 @@ static inline void sincos(float x_full, varying float * uniform sin_result,
             *cos_result = insert(*cos_result, i, c);
         }
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         const float pi_over_two_vec = 1.57079637050628662109375;
         const float two_over_pi_vec = 0.636619746685028076171875;
@@ -2906,7 +2906,7 @@ static inline void sincos(float x_full, varying float * uniform sin_result,
 __declspec(safe)
 static inline void sincos(uniform float x_full, uniform float * uniform sin_result,
                           uniform float * uniform cos_result) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       __sincos_uniform_float(x_full, sin_result, cos_result);
     }
@@ -2914,7 +2914,7 @@ static inline void sincos(uniform float x_full, uniform float * uniform sin_resu
         __math_lib == __math_lib_svml) {
         __stdlib_sincosf(x_full, sin_result, cos_result);
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         const uniform float pi_over_two_vec = 1.57079637050628662109375;
         const uniform float two_over_pi_vec = 0.636619746685028076171875;
@@ -2972,7 +2972,7 @@ static inline void sincos(uniform float x_full, uniform float * uniform sin_resu
 
 __declspec(safe)
 static inline float tan(float x_full) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __tan_varying_float(x_full);
     }
@@ -2987,7 +2987,7 @@ static inline float tan(float x_full) {
         }
         return ret;
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         const float pi_over_four_vec = 0.785398185253143310546875;
         const float four_over_pi_vec = 1.27323949337005615234375;
@@ -3055,7 +3055,7 @@ static inline float tan(float x_full) {
 
 __declspec(safe)
 static inline uniform float tan(uniform float x_full) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __tan_uniform_float(x_full);
     }
@@ -3063,7 +3063,7 @@ static inline uniform float tan(uniform float x_full) {
         __math_lib == __math_lib_svml) {
         return __stdlib_tanf(x_full);
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         const uniform float pi_over_four_vec = 0.785398185253143310546875;
         const uniform float four_over_pi_vec = 1.27323949337005615234375;
@@ -3131,7 +3131,7 @@ static inline uniform float tan(uniform float x_full) {
 
 __declspec(safe)
 static inline float atan(float x_full) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __atan_varying_float(x_full);
     }
@@ -3146,7 +3146,7 @@ static inline float atan(float x_full) {
         }
         return ret;
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         const float pi_over_two_vec = 1.57079637050628662109375;
         // atan(-x) = -atan(x) (so flip from negative to positive first)
@@ -3186,7 +3186,7 @@ static inline float atan(float x_full) {
 
 __declspec(safe)
 static inline uniform float atan(uniform float x_full) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __atan_uniform_float(x_full);
     }
@@ -3194,7 +3194,7 @@ static inline uniform float atan(uniform float x_full) {
         __math_lib == __math_lib_svml) {
         return __stdlib_atanf(x_full);
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         const uniform float pi_over_two_vec = 1.57079637050628662109375;
         // atan(-x) = -atan(x) (so flip from negative to positive first)
@@ -3234,7 +3234,7 @@ static inline uniform float atan(uniform float x_full) {
 
 __declspec(safe)
 static inline float atan2(float y, float x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __atan2_varying_float(y,x);
     }
@@ -3249,7 +3249,7 @@ static inline float atan2(float y, float x) {
         }
         return ret;
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         const float pi_vec = 3.1415926536;
         const float pi_over_two_vec = 1.5707963267;
@@ -3277,7 +3277,7 @@ static inline float atan2(float y, float x) {
 
 __declspec(safe)
 static inline uniform float atan2(uniform float y, uniform float x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __atan2_uniform_float(y,x);
     }
@@ -3285,7 +3285,7 @@ static inline uniform float atan2(uniform float y, uniform float x) {
         __math_lib == __math_lib_svml) {
         return __stdlib_atan2f(y, x);
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         const uniform float pi_vec = 3.1415927410125732421875;
         const uniform float pi_over_two_vec = 1.57079637050628662109375;
@@ -3317,7 +3317,7 @@ static inline float exp(float x_full) {
         return ret;
     }
     else if (__math_lib == __math_lib_ispc_fast) {
-        float z = floor(1.44269504088896341f * x_full + 0.5f); 
+        float z = floor(1.44269504088896341f * x_full + 0.5f);
         int n;
         x_full -= z * 0.693359375f;
         x_full -= z * -2.12194440e-4f;
@@ -3392,7 +3392,7 @@ static inline uniform float exp(uniform float x_full) {
         return __stdlib_expf(x_full);
     }
     else if (__math_lib == __math_lib_ispc_fast) {
-        uniform float z = floor(1.44269504088896341f * x_full + 0.5f); 
+        uniform float z = floor(1.44269504088896341f * x_full + 0.5f);
         uniform int n;
         x_full -= z * 0.693359375f;
         x_full -= z * -2.12194440e-4f;
@@ -3461,7 +3461,7 @@ static inline uniform float exp(uniform float x_full) {
 // * log(2) + log(y) where y is the reduced range (usually in [1/2,
 // 1)).
 __declspec(safe)
-static inline void __range_reduce_log(float input, varying float * uniform reduced, 
+static inline void __range_reduce_log(float input, varying float * uniform reduced,
                                       varying int * uniform exponent) {
     int int_version = intbits(input);
     // single precision = SEEE EEEE EMMM MMMM MMMM MMMM MMMM MMMM
@@ -3492,7 +3492,7 @@ static inline void __range_reduce_log(float input, varying float * uniform reduc
 
 
 __declspec(safe)
-static inline void __range_reduce_log(uniform float input, uniform float * uniform reduced, 
+static inline void __range_reduce_log(uniform float input, uniform float * uniform reduced,
                                       uniform int * uniform exponent) {
     uniform int int_version = intbits(input);
     static const uniform int nonexponent_mask = 0x807FFFFF;
@@ -3526,7 +3526,7 @@ static inline float log(float x_full) {
     else if (__math_lib == __math_lib_ispc_fast) {
         int e;
         x_full = frexp(x_full, &e);
-    
+
         int x_smaller_SQRTHF = (0.707106781186547524f > x_full) ? 0xffffffff : 0;
         e += x_smaller_SQRTHF;
         int ix_add = intbits(x_full);
@@ -3550,7 +3550,7 @@ static inline float log(float x_full) {
         y -= 0.5f * z;
         z  = x_full + y;
         return z + 0.693359375 * fe;
-    } 
+    }
     else if (__math_lib == __math_lib_ispc) {
         float reduced;
         int exponent;
@@ -3610,7 +3610,7 @@ static inline uniform float log(uniform float x_full) {
     else if (__math_lib == __math_lib_ispc_fast) {
         uniform int e;
         x_full = frexp(x_full, &e);
-    
+
         uniform int x_smaller_SQRTHF = (0.707106781186547524f > x_full) ? 0xffffffff : 0;
         e += x_smaller_SQRTHF;
         uniform int ix_add = intbits(x_full);
@@ -3698,7 +3698,7 @@ static inline float pow(float a, float b) {
         }
         return ret;
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         return exp(b * log(a));
     }
@@ -3713,7 +3713,7 @@ static inline uniform float pow(uniform float a, uniform float b) {
         __math_lib == __math_lib_svml) {
         return __stdlib_powf(a, b);
     }
-    else if (__math_lib == __math_lib_ispc || 
+    else if (__math_lib == __math_lib_ispc ||
              __math_lib == __math_lib_ispc_fast) {
         return exp(b * log(a));
     }
@@ -3754,13 +3754,13 @@ static inline QUAL double __rsqrt_safe_##QUAL##_double (QUAL double x)    \
 }
 
 RSQRTD(varying)
-__declspec(safe)   
-static inline double rsqrt(double v) {   
+__declspec(safe)
+static inline double rsqrt(double v) {
   if (__have_native_rsqrtd)
     return __rsqrt_varying_double(v);
   else
     return __rsqrt_safe_varying_double(v);
-}   
+}
 
 RSQRTD(uniform)
 __declspec(safe)
@@ -3816,11 +3816,11 @@ static inline uniform double frexp(uniform double x, uniform int * uniform pw2)
 
 __declspec(safe)
 static inline double sin(double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __sin_varying_double(x);
     }
-    else if (__math_lib == __math_lib_svml) 
+    else if (__math_lib == __math_lib_svml)
     {
       return __svml_sind(x);
     }
@@ -3835,11 +3835,11 @@ static inline double sin(double x) {
 }
 __declspec(safe)
 static inline double asin(double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __asin_varying_double(x);
     }
-    else if (__math_lib == __math_lib_svml) 
+    else if (__math_lib == __math_lib_svml)
     {
       return __svml_asind(x);
     }
@@ -3855,7 +3855,7 @@ static inline double asin(double x) {
 
 __declspec(safe)
 static inline uniform double sin(uniform double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __sin_uniform_double(x);
     }
@@ -3865,11 +3865,11 @@ static inline uniform double sin(uniform double x) {
 
 __declspec(safe)
 static inline double asin(const double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __asin_varying_double(x);
     }
-    else if (__math_lib == __math_lib_svml) 
+    else if (__math_lib == __math_lib_svml)
     {
       return __svml_asind(x);
     }
@@ -3885,11 +3885,11 @@ static inline double asin(const double x) {
 
 __declspec(safe)
 static inline double cos(const double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __cos_varying_double(x);
     }
-    if (__math_lib == __math_lib_svml) 
+    if (__math_lib == __math_lib_svml)
     {
       return __svml_cosd(x);
     }
@@ -3905,7 +3905,7 @@ static inline double cos(const double x) {
 
 __declspec(safe)
 static inline uniform double cos(uniform double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __cos_uniform_double(x);
     }
@@ -3916,11 +3916,11 @@ static inline uniform double cos(uniform double x) {
 __declspec(safe)
 static inline void sincos(double x, varying double * uniform sin_result,
                           varying double * uniform cos_result) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       __sincos_varying_double(x,sin_result,cos_result);
     }
-    if (__math_lib == __math_lib_svml) 
+    if (__math_lib == __math_lib_svml)
     {
       __svml_sincosd(x, sin_result, cos_result);
     }
@@ -3937,7 +3937,7 @@ static inline void sincos(double x, varying double * uniform sin_result,
 __declspec(safe)
 static inline void sincos(uniform double x, uniform double * uniform sin_result,
                           uniform double * uniform cos_result) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       __sincos_uniform_double(x,sin_result, cos_result);
     }
@@ -3947,11 +3947,11 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result,
 
 __declspec(safe)
 static inline double tan(double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __tan_varying_double(x);
     }
-    else if (__math_lib == __math_lib_svml) 
+    else if (__math_lib == __math_lib_svml)
     {
       return __svml_tand(x);
     }
@@ -3967,7 +3967,7 @@ static inline double tan(double x) {
 
 __declspec(safe)
 static inline uniform double tan(uniform double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __tan_uniform_double(x);
     }
@@ -3977,7 +3977,7 @@ static inline uniform double tan(uniform double x) {
 
 __declspec(safe)
 static inline double atan(double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __atan_varying_double(x);
     }
@@ -3993,7 +3993,7 @@ static inline double atan(double x) {
 
 __declspec(safe)
 static inline uniform double atan(uniform double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __atan_uniform_double(x);
     }
@@ -4003,11 +4003,11 @@ static inline uniform double atan(uniform double x) {
 
 __declspec(safe)
 static inline double atan2(double y, double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __atan2_varying_double(y,x);
     }
-    else if (__math_lib == __math_lib_svml) 
+    else if (__math_lib == __math_lib_svml)
     {
       return __svml_atan2d(y,x);
     }
@@ -4023,7 +4023,7 @@ static inline double atan2(double y, double x) {
 
 __declspec(safe)
 static inline uniform double atan2(uniform double y, uniform double x) {
-    if (__have_native_trigonometry) 
+    if (__have_native_trigonometry)
     {
       return __atan2_uniform_double(y,x);
     }
@@ -4036,7 +4036,7 @@ static inline double exp(double x) {
     if (__have_native_transcendentals) {
         return __exp_varying_double(x);
     }
-    else if (__math_lib == __math_lib_svml) 
+    else if (__math_lib == __math_lib_svml)
     {
         return __svml_expd(x);
     }
@@ -4064,7 +4064,7 @@ static inline double log(double x) {
     if (__have_native_transcendentals) {
         return __log_varying_double(x);
     }
-    else if (__math_lib == __math_lib_svml) 
+    else if (__math_lib == __math_lib_svml)
     {
         return __svml_logd(x);
     }
@@ -4092,7 +4092,7 @@ static inline double pow(double a, double b) {
     if (__have_native_transcendentals) {
         return __pow_varying_double(a,b);
     }
-    else if (__math_lib == __math_lib_svml) 
+    else if (__math_lib == __math_lib_svml)
     {
         return __svml_powd(a,b);
     }
@@ -4195,7 +4195,7 @@ static inline uniform int16 float_to_half(uniform float f) {
         // unconditional assignment here, will override with right value for
         // the regular case below.
         uniform int32 f32infty = 255ul << 23;
-        o = (fint > f32infty) ? 0x7e00u : 0x7c00u; 
+        o = (fint > f32infty) ? 0x7e00u : 0x7c00u;
 
         // (De)normalized number or zero
         // update fint unconditionally to save the blending; we don't need it
@@ -4326,14 +4326,14 @@ static inline uniform int16 float_to_half_fast(uniform float f) {
 
         uniform unsigned int32 hs = (xs >> 16); // Sign bit
         // Exponent unbias the single, then bias the halfp
-        uniform int32 hes = ((int)(xe >> 23)) - 127 + 15; 
+        uniform int32 hes = ((int)(xe >> 23)) - 127 + 15;
         uniform unsigned int32 he = (hes << 10); // Exponent
         uniform int32 hm = (xm >> 13); // Mantissa
         uniform int32 ret = (hs | he | hm);
 
         if (xm & 0x00001000u) // Check for rounding
             // Round, might overflow to inf, this is OK
-            ret += 1u; 
+            ret += 1u;
 
         return (int16)ret;
     }
@@ -4352,14 +4352,14 @@ static inline int16 float_to_half_fast(float f) {
 
         unsigned int32 hs = (xs >> 16); // Sign bit
         // Exponent unbias the single, then bias the halfp
-        int32 hes = ((int)(xe >> 23)) - 127 + 15; 
+        int32 hes = ((int)(xe >> 23)) - 127 + 15;
         unsigned int32 he = (hes << 10); // Exponent
         int32 hm = (xm >> 13); // Mantissa
         int32 ret = (hs | he | hm);
 
         if (xm & 0x00001000u) // Check for rounding
             // Round, might overflow to inf, this is OK
-            ret += 1u; 
+            ret += 1u;
 
         return (int16)ret;
     }
@@ -4427,7 +4427,7 @@ float_to_srgb8(float inval)
     };
 
     static const uniform unsigned int almost_one = 0x3f7fffff;
-    
+
     // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
     inval = max(inval, 0.0f);
     inval = min(inval, floatbits(almost_one));
@@ -4477,7 +4477,7 @@ float_to_srgb8(uniform float inval)
     };
 
     static const uniform unsigned int almost_one = 0x3f7fffff;
-    
+
     // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
     inval = max(inval, 0.0f);
     inval = min(inval, floatbits(almost_one));
@@ -4505,7 +4505,7 @@ static inline unsigned int random(varying RNGState * uniform state)
 
     b  = ((state->z1 << 6) ^ state->z1) >> 13;
     state->z1 = ((state->z1 & 4294967294U) << 18) ^ b;
-    b  = ((state->z2 << 2) ^ state->z2) >> 27; 
+    b  = ((state->z2 << 2) ^ state->z2) >> 27;
     state->z2 = ((state->z2 & 4294967288U) << 2) ^ b;
     b  = ((state->z3 << 13) ^ state->z3) >> 21;
     state->z3 = ((state->z3 & 4294967280U) << 7) ^ b;
@@ -4520,7 +4520,7 @@ static inline uniform unsigned int random(uniform RNGState * uniform state)
 
     b  = ((state->z1 << 6) ^ state->z1) >> 13;
     state->z1 = ((state->z1 & 4294967294U) << 18) ^ b;
-    b  = ((state->z2 << 2) ^ state->z2) >> 27; 
+    b  = ((state->z2 << 2) ^ state->z2) >> 27;
     state->z2 = ((state->z2 & 4294967288U) << 2) ^ b;
     b  = ((state->z3 << 13) ^ state->z3) >> 21;
     state->z3 = ((state->z3 & 4294967280U) << 7) ^ b;
@@ -4543,7 +4543,7 @@ static inline uniform float frandom(uniform RNGState * uniform state)
     return floatbits(0x3F800000 | irand)-1.0f;
 }
 
-static inline void seed_rng(varying RNGState * uniform state, 
+static inline void seed_rng(varying RNGState * uniform state,
                             unsigned int seed) {
     state->z1 = seed;
     state->z2 = seed ^ 0xbeeff00d;
@@ -4552,7 +4552,7 @@ static inline void seed_rng(varying RNGState * uniform state,
                  ((seed & 0xff0000ul) >> 8) | (seed & 0xff000000ul) >> 24);
 }
 
-static inline void seed_rng(uniform RNGState * uniform state, 
+static inline void seed_rng(uniform RNGState * uniform state,
                             uniform unsigned int seed) {
     state->z1 = seed;
     state->z2 = seed ^ 0xbeeff00d;
@@ -4631,52 +4631,52 @@ static inline varying int64 saturating_add(varying int64 a, varying int64 b) {
     return result;
 }
 
-static inline uniform unsigned int8 saturating_add(uniform unsigned int8 a, 
+static inline uniform unsigned int8 saturating_add(uniform unsigned int8 a,
                                                    uniform unsigned int8 b) {
     uniform unsigned int8 result = a + b;
     result |= (-(uniform int8)(result < a));
     return result;
 }
 
-static inline varying unsigned int8 saturating_add(varying unsigned int8 a, 
+static inline varying unsigned int8 saturating_add(varying unsigned int8 a,
                                                    varying unsigned int8 b) {
     return __paddus_vi8(a, b);
 }
 
-static inline uniform unsigned int16 saturating_add(uniform unsigned int16 a, 
+static inline uniform unsigned int16 saturating_add(uniform unsigned int16 a,
                                                     uniform unsigned int16 b) {
     uniform unsigned int16 result = a + b;
     result |= (-(uniform int16)(result < a));
     return result;
 }
 
-static inline varying unsigned int16 saturating_add(varying unsigned int16 a, 
+static inline varying unsigned int16 saturating_add(varying unsigned int16 a,
                                                     varying unsigned int16 b) {
     return __paddus_vi16(a, b);
 }
 
-static inline uniform unsigned int32 saturating_add(uniform unsigned int32 a, 
+static inline uniform unsigned int32 saturating_add(uniform unsigned int32 a,
                                                     uniform unsigned int32 b) {
     uniform unsigned int32 result = a + b;
     result |= (-(uniform int32)(result < a));
     return result;
 }
 
-static inline varying unsigned int32 saturating_add(varying unsigned int32 a, 
+static inline varying unsigned int32 saturating_add(varying unsigned int32 a,
                                                     varying unsigned int32 b) {
     varying unsigned int32 result = a + b;
     result |= (-(varying int32)(result < a));
     return result;
 }
 
-static inline uniform unsigned int64 saturating_add(uniform unsigned int64 a, 
+static inline uniform unsigned int64 saturating_add(uniform unsigned int64 a,
                                                     uniform unsigned int64 b) {
     uniform unsigned int64 result = a + b;
     result |= (-(uniform int64)(result < a));
     return result;
 }
 
-static inline varying unsigned int64 saturating_add(varying unsigned int64 a, 
+static inline varying unsigned int64 saturating_add(varying unsigned int64 a,
                                                     varying unsigned int64 b) {
     varying unsigned int64 result = a + b;
     result |= (-(varying int64)(result < a));
@@ -4745,52 +4745,52 @@ static inline varying int64 saturating_sub(varying int64 a, varying int64 b) {
     return result;
 }
 
-static inline uniform unsigned int8 saturating_sub(uniform unsigned int8 a, 
+static inline uniform unsigned int8 saturating_sub(uniform unsigned int8 a,
                                                    uniform unsigned int8 b) {
     uniform unsigned int8 result = a - b;
     result &= (-(uniform int8)(result <= a));
     return result;
 }
 
-static inline varying unsigned int8 saturating_sub(varying unsigned int8 a, 
+static inline varying unsigned int8 saturating_sub(varying unsigned int8 a,
                                                    varying unsigned int8 b) {
     return __psubus_vi8(a, b);
 }
 
-static inline uniform unsigned int16 saturating_sub(uniform unsigned int16 a, 
+static inline uniform unsigned int16 saturating_sub(uniform unsigned int16 a,
                                                     uniform unsigned int16 b) {
     uniform unsigned int16 result = a - b;
     result &= (-(uniform int16)(result <= a));
     return result;
 }
 
-static inline varying unsigned int16 saturating_sub(varying unsigned int16 a, 
+static inline varying unsigned int16 saturating_sub(varying unsigned int16 a,
                                                     varying unsigned int16 b) {
     return __psubus_vi16(a, b);
 }
 
-static inline uniform unsigned int32 saturating_sub(uniform unsigned int32 a, 
+static inline uniform unsigned int32 saturating_sub(uniform unsigned int32 a,
                                                     uniform unsigned int32 b) {
     uniform unsigned int32 result = a - b;
     result &= (-(uniform int32)(result <= a));
     return result;
 }
 
-static inline varying unsigned int32 saturating_sub(varying unsigned int32 a, 
+static inline varying unsigned int32 saturating_sub(varying unsigned int32 a,
                                                     varying unsigned int32 b) {
     varying unsigned int32 result = a - b;
     result &= (-(varying int32)(result <= a));
     return result;
 }
 
-static inline uniform unsigned int64 saturating_sub(uniform unsigned int64 a, 
+static inline uniform unsigned int64 saturating_sub(uniform unsigned int64 a,
                                                     uniform unsigned int64 b) {
     uniform unsigned int64 result = a - b;
     result &= (-(uniform int64)(result <= a));
     return result;
 }
 
-static inline varying unsigned int64 saturating_sub(varying unsigned int64 a, 
+static inline varying unsigned int64 saturating_sub(varying unsigned int64 a,
                                                     varying unsigned int64 b) {
     varying unsigned int64 result = a - b;
     result &= (-(varying int64)(result <= a));
@@ -4851,7 +4851,7 @@ static inline uniform unsigned int8 saturating_div(uniform unsigned int8 a,
     return a / b;
 }
 
-static inline varying unsigned int8 saturating_div(varying unsigned int8 a, 
+static inline varying unsigned int8 saturating_div(varying unsigned int8 a,
                                                    varying unsigned int8 b) {
     /* No overflow possible */
     return a / b;
@@ -4863,13 +4863,13 @@ static inline uniform unsigned int16 saturating_div(uniform unsigned int16 a,
     return a / b;
 }
 
-static inline varying unsigned int16 saturating_div(varying unsigned int16 a, 
+static inline varying unsigned int16 saturating_div(varying unsigned int16 a,
                                                     varying unsigned int16 b) {
     /* No overflow possible */
     return a / b;
 }
 
-static inline uniform unsigned int32 saturating_div(uniform unsigned int32 a, 
+static inline uniform unsigned int32 saturating_div(uniform unsigned int32 a,
                                                     uniform unsigned int32 b) {
     /* No overflow possible */
     return a / b;
@@ -4881,81 +4881,81 @@ static inline varying unsigned int32 saturating_div(varying unsigned int32 a,
     return a / b;
 }
 
-static inline uniform unsigned int64 saturating_div(uniform unsigned int64 a, 
+static inline uniform unsigned int64 saturating_div(uniform unsigned int64 a,
                                                     uniform unsigned int64 b) {
     /* No overflow possible */
     return a / b;
 }
 
-static inline varying unsigned int64 saturating_div(varying unsigned int64 a, 
+static inline varying unsigned int64 saturating_div(varying unsigned int64 a,
                                                     varying unsigned int64 b) {
     /* No overflow possible */
     return a / b;
 }
 
 static inline uniform int8 saturating_mul(uniform int8 a, uniform int8 b) {
-    uniform int16 result = (uniform int16) a * (uniform int16) b;	
+    uniform int16 result = (uniform int16) a * (uniform int16) b;
     uniform unsigned int8 result2 = ((uniform unsigned int8) (a ^ b) >> 7) + INT8_MAX;
     uniform int8 hi = result >> 8;
     uniform int8 lo = result;
-    if (hi != (lo >> 7)) 
+    if (hi != (lo >> 7))
         result = result2;
-    return result; 
+    return result;
 }
 
 static inline varying int8 saturating_mul(varying int8 a, varying int8 b) {
-    varying int16 result = (varying int16) a * (varying int16) b;	
+    varying int16 result = (varying int16) a * (varying int16) b;
     varying unsigned int8 result2 = ((varying unsigned int8) (a ^ b) >> 7) + INT8_MAX;
     varying int8 hi = result >> 8;
     varying int8 lo = result;
-    if (hi != (lo >> 7)) 
+    if (hi != (lo >> 7))
         result = result2;
-    return result; 
+    return result;
 }
 
 static inline uniform int16 saturating_mul(uniform int16 a, uniform int16 b) {
-    uniform int32 result = (uniform int32) a * (uniform int32) b;	
+    uniform int32 result = (uniform int32) a * (uniform int32) b;
     uniform unsigned int16 result2 = ((uniform unsigned int16) (a ^ b) >> 15) + INT16_MAX;
     uniform int16 hi = result >> 16;
     uniform int16 lo = result;
-    if (hi != (lo >> 15)) 
+    if (hi != (lo >> 15))
         result = result2;
-    return result; 
+    return result;
 }
 
 static inline varying int16 saturating_mul(varying int16 a, varying int16 b) {
-    varying int32 result = (varying int32) a * (varying int32) b;	
+    varying int32 result = (varying int32) a * (varying int32) b;
     varying unsigned int16 result2 = ((varying unsigned int16) (a ^ b) >> 15) + INT16_MAX;
     varying int16 hi = result >> 16;
     varying int16 lo = result;
-    if (hi != (lo >> 15)) 
+    if (hi != (lo >> 15))
         result = result2;
-    return result; 
+    return result;
 }
 
 static inline uniform int32 saturating_mul(uniform int32 a, uniform int32 b) {
-    uniform int64 result = (uniform int64) a * (uniform int64) b;	
+    uniform int64 result = (uniform int64) a * (uniform int64) b;
     uniform unsigned int32 result2 = ((uniform unsigned int32) (a ^ b) >> 31) + INT32_MAX;
     uniform int32 hi = result >> 32;
     uniform int32 lo = result;
-    if (hi != (lo >> 31)) 
+    if (hi != (lo >> 31))
         result = result2;
-    return result; 
+    return result;
 }
 
 static inline varying int32 saturating_mul(varying int32 a, varying int32 b) {
-    varying int64 result = (varying int64) a * (varying int64) b;	
+    varying int64 result = (varying int64) a * (varying int64) b;
     varying unsigned int32 result2 = ((varying unsigned int32) (a ^ b) >> 31) + INT32_MAX;
     varying int32 hi = result >> 32;
     varying int32 lo = result;
-    if (hi != (lo >> 31)) 
+    if (hi != (lo >> 31))
         result = result2;
-    return result; 
+    return result;
 }
 
 static inline uniform unsigned int8 saturating_mul(uniform unsigned int8 a,
                                                    uniform unsigned int8 b) {
-    uniform unsigned int16 result = (uniform unsigned int16) a * 
+    uniform unsigned int16 result = (uniform unsigned int16) a *
                                     (uniform unsigned int16) b;
     uniform unsigned int8 hi = result >> 8;
     uniform unsigned int8 lo = result;
@@ -4964,7 +4964,7 @@ static inline uniform unsigned int8 saturating_mul(uniform unsigned int8 a,
 
 static inline varying unsigned int8 saturating_mul(varying unsigned int8 a,
                                                    varying unsigned int8 b) {
-    varying unsigned int16 result = (varying unsigned int16) a * 
+    varying unsigned int16 result = (varying unsigned int16) a *
                                     (varying unsigned int16) b;
     varying unsigned int8 hi = result >> 8;
     varying unsigned int8 lo = result;
@@ -4973,7 +4973,7 @@ static inline varying unsigned int8 saturating_mul(varying unsigned int8 a,
 
 static inline uniform unsigned int16 saturating_mul(uniform unsigned int16 a,
                                                     uniform unsigned int16 b) {
-    uniform unsigned int32 result = (uniform unsigned int32) a * 
+    uniform unsigned int32 result = (uniform unsigned int32) a *
                                     (uniform unsigned int32) b;
     uniform unsigned int16 hi = result >> 16;
     uniform unsigned int16 lo = result;
@@ -4982,7 +4982,7 @@ static inline uniform unsigned int16 saturating_mul(uniform unsigned int16 a,
 
 static inline varying unsigned int16 saturating_mul(varying unsigned int16 a,
                                                     varying unsigned int16 b) {
-    varying unsigned int32 result = (varying unsigned int32) a * 
+    varying unsigned int32 result = (varying unsigned int32) a *
                                     (varying unsigned int32) b;
     varying unsigned int16 hi = result >> 16;
     varying unsigned int16 lo = result;
@@ -4991,7 +4991,7 @@ static inline varying unsigned int16 saturating_mul(varying unsigned int16 a,
 
 static inline uniform unsigned int32 saturating_mul(uniform unsigned int32 a,
                                                     uniform unsigned int32 b) {
-    uniform unsigned int64 result = (uniform unsigned int64) a * 
+    uniform unsigned int64 result = (uniform unsigned int64) a *
                                     (uniform unsigned int64) b;
     uniform unsigned int32 hi = result >> 32;
     uniform unsigned int32 lo = result;
@@ -5000,12 +5000,162 @@ static inline uniform unsigned int32 saturating_mul(uniform unsigned int32 a,
 
 static inline varying unsigned int32 saturating_mul(varying unsigned int32 a,
                                                     varying unsigned int32 b) {
-    varying unsigned int64 result = (varying unsigned int64) a * 
+    varying unsigned int64 result = (varying unsigned int64) a *
                                     (varying unsigned int64) b;
     varying unsigned int32 hi = result >> 32;
     varying unsigned int32 lo = result;
     return lo | - (varying int32) !! hi;
 }
+
+static inline uniform int64 saturating_mul(uniform int64 a, uniform int64 b) {
+    uniform unsigned int64 ret = 0;
+
+    uniform int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1;
+    uniform unsigned int64 a_abs = 0;
+    uniform unsigned int64 b_abs = 0;
+
+    if (a == INT64_MIN)
+        // Operation "-" is undefined for "INT64_MIN", as it causes overflow.
+        // But converting INT64_MIN to unsigned type yields the correct result,
+        // i.e. it will be positive value -INT64_MIN.
+        // See 6.3.1.3 section in C99 standart for more details (ISPC follows
+        // C standard, unless it's specifically different in the language).
+        a_abs = (uniform unsigned int64) INT64_MIN;
+    else
+        a_abs = (a > 0) ? a : -a;
+
+    if (b == INT64_MIN)
+        b_abs = (uniform unsigned int64) INT64_MIN;
+    else
+        b_abs = (b > 0) ? b : -b;
+
+    uniform unsigned int32 a0 = a_abs & 0xFFFFFFFF;
+    uniform unsigned int32 b0 = b_abs & 0xFFFFFFFF;
+    uniform unsigned int32 a1 = a_abs >> 32;
+    uniform unsigned int32 b1 = b_abs >> 32;
+
+    if ((a1 != 0) && (b1 != 0)) {
+        if (sign > 0) {
+            return INT64_MAX;
+        }
+        else {
+            return INT64_MIN;
+        }
+    } else if (a1 != 0) {
+        ret = saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 ,
+                              (uniform unsigned int64) (a0) * b0);
+    } else if (b1 != 0) {
+        ret = saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 ,
+                              (uniform unsigned int64) (a0) * b0);
+    } else {
+        ret = a_abs * b_abs;
+    }
+
+
+    if ((sign < 0) && (ret >= (uniform unsigned int64) INT64_MIN)) {
+        return INT64_MIN;
+    } else if ((sign > 0) && (ret >= INT64_MAX)) {
+        return INT64_MAX;
+    } else {
+        return ret * sign;
+    }
+}
+
+static inline varying int64 saturating_mul(varying int64 a, varying int64 b) {
+    varying unsigned int64 ret = 0;
+
+    varying int8 sign = (((a > 0) && (b > 0)) || ((a < 0) && (b < 0))) ? 1 : -1;
+    varying unsigned int64 a_abs = 0;
+    varying unsigned int64 b_abs = 0;
+
+    if (a == INT64_MIN)
+        // Operation "-" is undefined for "INT64_MIN", as it causes overflow.
+        // But converting INT64_MIN to unsigned type yields the correct result,
+        // i.e. it will be positive value -INT64_MIN.
+        // See 6.3.1.3 section in C99 standart for more details (ISPC follows
+        // C standard, unless it's specifically different in the language).
+        a_abs = (varying unsigned int64) INT64_MIN;
+    else
+        a_abs = (a > 0) ? a : -a;
+
+    if (b == INT64_MIN)
+        b_abs = (varying unsigned int64) INT64_MIN;
+    else
+        b_abs = (b > 0) ? b : -b;
+
+
+    varying unsigned int32 a0 = a_abs & 0xFFFFFFFF;
+    varying unsigned int32 b0 = b_abs & 0xFFFFFFFF;
+    varying unsigned int32 a1 = a_abs >> 32;
+    varying unsigned int32 b1 = b_abs >> 32;
+
+    if ((a1 != 0) && (b1 != 0)) {
+        if (sign > 0) {
+            return INT64_MAX;
+        }
+        else {
+            return INT64_MIN;
+        }
+    } else if (a1 != 0) {
+        ret = saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 ,
+                              (varying unsigned int64) (a0) * b0);
+    } else if (b1 != 0) {
+        ret = saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 ,
+                              (varying unsigned int64) (a0) * b0);
+    } else {
+        ret = a_abs * b_abs;
+    }
+
+
+    if ((sign < 0) && (ret >= (varying unsigned int64) INT64_MIN)) {
+        return INT64_MIN;
+    } else if ((sign > 0) && (ret >= INT64_MAX)) {
+        return INT64_MAX;
+    } else {
+        return ret * sign;
+    }
+}
+
+
+static inline uniform unsigned int64 saturating_mul(uniform unsigned int64 a,
+                                                    uniform unsigned int64 b) {
+    uniform unsigned int32 a0 = a & 0xFFFFFFFF;
+    uniform unsigned int32 b0 = b & 0xFFFFFFFF;
+    uniform unsigned int32 a1 = a >> 32;
+    uniform unsigned int32 b1 = b >> 32;
+
+    if ((a1 != 0) && (b1 != 0)) {
+        return UINT64_MAX;
+    } else if (a1 != 0) {
+        return saturating_add ((uniform unsigned int64) saturating_mul (b0, a1) << 32 ,
+                              (uniform unsigned int64) (a0) * b0);
+    } else if (b1 != 0) {
+        return saturating_add ((uniform unsigned int64) saturating_mul (a0, b1) << 32 ,
+                              (uniform unsigned int64) (a0) * b0);
+    } else {
+        return a * b;
+    }
+}
+
+static inline varying unsigned int64 saturating_mul(varying unsigned int64 a,
+                                                    varying unsigned int64 b) {
+    varying unsigned int32 a0 = a & 0xFFFFFFFF;
+    varying unsigned int32 b0 = b & 0xFFFFFFFF;
+    varying unsigned int32 a1 = a >> 32;
+    varying unsigned int32 b1 = b >> 32;
+
+    if ((a1 != 0) && (b1 != 0)) {
+        return UINT64_MAX;
+    } else if (a1 != 0) {
+        return saturating_add ((varying unsigned int64) saturating_mul (b0, a1) << 32 ,
+                              (varying unsigned int64) (a0) * b0);
+    } else if (b1 != 0) {
+        return saturating_add ((varying unsigned int64) saturating_mul (a0, b1) << 32 ,
+                              (varying unsigned int64) (a0) * b0);
+    } else {
+        return a * b;
+    }
+}
 ///////////////////////////////////////////////////////////////////////////
 // rdrand
 
diff --git a/stmt.cpp b/stmt.cpp
index 4cca5d40..c528909b 100644
--- a/stmt.cpp
+++ b/stmt.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2013, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/stmt.h b/stmt.h
index 7ed1f0ef..fb34c801 100644
--- a/stmt.h
+++ b/stmt.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2012, Intel Corporation
+  Copyright (c) 2010-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/sym.cpp b/sym.cpp
index 05f9996a..396ec488 100644
--- a/sym.cpp
+++ b/sym.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2012, Intel Corporation
+  Copyright (c) 2010-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/sym.h b/sym.h
index 761c3612..5840fcdb 100644
--- a/sym.h
+++ b/sym.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2012, Intel Corporation
+  Copyright (c) 2010-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/test_static.cpp b/test_static.cpp
index 27a5b136..c27e2741 100644
--- a/test_static.cpp
+++ b/test_static.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
diff --git a/tests/max-double-1.ispc b/tests/max-double-1.ispc
new file mode 100644
index 00000000..e9c4a6a3
--- /dev/null
+++ b/tests/max-double-1.ispc
@@ -0,0 +1,19 @@
+
+export uniform int width() { return programCount; }
+
+
+
+export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
+    double a = aFOO[programIndex];
+    RET[programIndex] = max(3 * a, (double)10.f);
+    RET[width()-1] = max(b, (double)100);
+}
+
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 3 * (1+programIndex);
+    RET[0] = 10;
+    RET[1] = 10;
+    RET[2] = 10;
+    RET[programCount-1] = 100;
+}
diff --git a/tests/max-double-2.ispc b/tests/max-double-2.ispc
new file mode 100644
index 00000000..5f4c854e
--- /dev/null
+++ b/tests/max-double-2.ispc
@@ -0,0 +1,18 @@
+
+export uniform int width() { return programCount; }
+
+
+
+export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
+    double a = aFOO[programIndex];
+    RET[programIndex] = max(-10 * (a-3), (double).1f);
+    RET[width() - 1] = max(-10 * b, (double)2);
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = .1;
+    RET[0] = 20;
+    RET[1] = 10;
+    RET[programCount - 1] = 2;
+}
+
diff --git a/tests/max-float-1.ispc b/tests/max-float-1.ispc
index b77de7e3..24b9822d 100644
--- a/tests/max-float-1.ispc
+++ b/tests/max-float-1.ispc
@@ -3,9 +3,17 @@ export uniform int width() { return programCount; }
 
 
 
-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
     float a = aFOO[programIndex];
-    RET[programIndex] = max(10 * a, 10.f);
+    RET[programIndex] = max(3 * a, 10.f);
+    RET[width()-1] = max(b, 100);
 }
 
-export void result(uniform float RET[]) { RET[programIndex] = 10 * (1+programIndex); }
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 3 * (1+programIndex);
+    RET[0] = 10;
+    RET[1] = 10;
+    RET[2] = 10;
+    RET[programCount-1] = 100;
+}
diff --git a/tests/max-float-2.ispc b/tests/max-float-2.ispc
index ca025c2f..f990b102 100644
--- a/tests/max-float-2.ispc
+++ b/tests/max-float-2.ispc
@@ -3,10 +3,16 @@ export uniform int width() { return programCount; }
 
 
 
-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
     float a = aFOO[programIndex];
-    RET[programIndex] = max(-10 * a, 10.f);
+    RET[programIndex] = max(-10 * (a-3), .1f);
+    RET[width() - 1] = max(-10 * b, 2);
 }
 
-export void result(uniform float RET[]) { RET[programIndex] = 10.; }
+export void result(uniform float RET[]) {
+    RET[programIndex] = .1;
+    RET[0] = 20;
+    RET[1] = 10;
+    RET[programCount - 1] = 2;
+}
 
diff --git a/tests/max-int-1.ispc b/tests/max-int-1.ispc
index f1492b8b..7a565d4c 100644
--- a/tests/max-int-1.ispc
+++ b/tests/max-int-1.ispc
@@ -3,11 +3,16 @@ export uniform int width() { return programCount; }
 
 
 
-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
     float a = aFOO[programIndex];
     int i = (int)a;
-    RET[programIndex] = max((int)200, i);
+    RET[programIndex] = max((int)2, i);
+    RET[width()-1] = max(10, (int)b);
 }
 
-export void result(uniform float RET[]) { RET[programIndex] = 200.; }
+export void result(uniform float RET[]) {
+    RET[programIndex] = programIndex + 1;
+    RET[0] = 2;
+    RET[programCount-1] = 10;
+}
 
diff --git a/tests/max-int.ispc b/tests/max-int.ispc
index 3a4bb641..783a9274 100644
--- a/tests/max-int.ispc
+++ b/tests/max-int.ispc
@@ -3,11 +3,15 @@ export uniform int width() { return programCount; }
 
 
 
-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
     float a = aFOO[programIndex];
     int i = (int)a;
-    RET[programIndex] = max((int)-20, i);
+    RET[programIndex] = max((int)-2, -1 * i);
+    RET[width() - 1] = max(-2, -1 * (int)b);
 }
 
-export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; }
+export void result(uniform float RET[]) {
+    RET[programIndex] = -2;
+    RET[0] = -1;
+}
 
diff --git a/tests/max-uint-1.ispc b/tests/max-uint-1.ispc
index d1143f5d..78a66625 100644
--- a/tests/max-uint-1.ispc
+++ b/tests/max-uint-1.ispc
@@ -1,14 +1,16 @@
 
 export uniform int width() { return programCount; }
 
-export void f_f(uniform float r[], uniform float a[]) {
+export void f_fu(uniform float r[], uniform float a[], uniform float b) {
     unsigned int i = (unsigned int)a[programIndex];
     r[programIndex] = max((unsigned int)2, i);
+    r[width() - 1] = max((unsigned int)10, (unsigned int)b);
 }
 
 export void result(uniform float r[]) { 
     r[programIndex] = 1+programIndex;
     r[0] = 2;
+    r[programCount - 1] = 10;
 }
 
 
diff --git a/tests/min-double-1.ispc b/tests/min-double-1.ispc
new file mode 100644
index 00000000..813a99fe
--- /dev/null
+++ b/tests/min-double-1.ispc
@@ -0,0 +1,19 @@
+
+export uniform int width() { return programCount; }
+
+
+
+export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
+    double a = aFOO[programIndex];
+    RET[programIndex] = min(3 * a, (double)10.f);
+    RET[width()-1] = min(b, (double)100);
+}
+
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 10;
+    RET[0] = 3;
+    RET[1] = 6;
+    RET[2] = 9;
+    RET[programCount-1] = 5;
+}
diff --git a/tests/min-double-2.ispc b/tests/min-double-2.ispc
new file mode 100644
index 00000000..26609b81
--- /dev/null
+++ b/tests/min-double-2.ispc
@@ -0,0 +1,18 @@
+
+export uniform int width() { return programCount; }
+
+
+
+export void f_du(uniform float RET[], uniform double aFOO[], uniform double b) {
+    double a = aFOO[programIndex];
+    RET[programIndex] = min(-10 * (a-3), (double).1f);
+    RET[width() - 1] = min(-10 * b, (double)2);
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = -10 * (programIndex - 2);
+    RET[0] = .1;
+    RET[1] = .1;
+    RET[programCount - 1] = -50;
+}
+
diff --git a/tests/min-float-1.ispc b/tests/min-float-1.ispc
index 914ae994..5b62c5c5 100644
--- a/tests/min-float-1.ispc
+++ b/tests/min-float-1.ispc
@@ -3,9 +3,17 @@ export uniform int width() { return programCount; }
 
 
 
-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
     float a = aFOO[programIndex];
-    RET[programIndex] = min(10 * a, 10.f);
+    RET[programIndex] = min(3 * a, 10.f);
+    RET[width()-1] = min(b, 100);
 }
 
-export void result(uniform float RET[]) { RET[programIndex] = 10.; }
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 10;
+    RET[0] = 3;
+    RET[1] = 6;
+    RET[2] = 9;
+    RET[programCount-1] = 5;
+}
diff --git a/tests/min-float-2.ispc b/tests/min-float-2.ispc
new file mode 100644
index 00000000..85c226ca
--- /dev/null
+++ b/tests/min-float-2.ispc
@@ -0,0 +1,18 @@
+
+export uniform int width() { return programCount; }
+
+
+
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
+    float a = aFOO[programIndex];
+    RET[programIndex] = min(-10 * (a-3), .1f);
+    RET[width() - 1] = min(-10 * b, 2);
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = -10 * (programIndex - 2);
+    RET[0] = .1;
+    RET[1] = .1;
+    RET[programCount - 1] = -50;
+}
+
diff --git a/tests/min-float.ispc b/tests/min-float.ispc
deleted file mode 100644
index caedd962..00000000
--- a/tests/min-float.ispc
+++ /dev/null
@@ -1,11 +0,0 @@
-
-export uniform int width() { return programCount; }
-
-
-
-export void f_f(uniform float RET[], uniform float aFOO[]) {
-    float a = aFOO[programIndex];
-    RET[programIndex] = min(a, 200.f);
-}
-
-export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; }
diff --git a/tests/min-int-1.ispc b/tests/min-int-1.ispc
index 1c81936f..86f0821d 100644
--- a/tests/min-int-1.ispc
+++ b/tests/min-int-1.ispc
@@ -3,11 +3,16 @@ export uniform int width() { return programCount; }
 
 
 
-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
     float a = aFOO[programIndex];
     int i = (int)a;
-    RET[programIndex] = min((int)-20, i);
+    RET[programIndex] = min((int)2, i);
+    RET[width()-1] = min(10, (int)b);
 }
 
-export void result(uniform float RET[]) { RET[programIndex] = -20; }
+export void result(uniform float RET[]) {
+    RET[programIndex] = 2;
+    RET[0] = 1;
+    RET[programCount-1] = 5;
+}
 
diff --git a/tests/min-int.ispc b/tests/min-int.ispc
index 483b9b41..7f97e28c 100644
--- a/tests/min-int.ispc
+++ b/tests/min-int.ispc
@@ -3,11 +3,16 @@ export uniform int width() { return programCount; }
 
 
 
-export void f_f(uniform float RET[], uniform float aFOO[]) {
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
     float a = aFOO[programIndex];
     int i = (int)a;
-    RET[programIndex] = min((int)200, i);
+    RET[programIndex] = min((int)-2, -1 * i);
+    RET[width() - 1] = min(-2, -1 * (int)b);
 }
 
-export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; }
+export void result(uniform float RET[]) {
+    RET[programIndex] = - programIndex - 1;
+    RET[0] = -2;
+    RET[programCount - 1] = -5;
+}
 
diff --git a/tests/min-uint-1.ispc b/tests/min-uint-1.ispc
index d1cd4461..042382f0 100644
--- a/tests/min-uint-1.ispc
+++ b/tests/min-uint-1.ispc
@@ -1,14 +1,16 @@
 
 export uniform int width() { return programCount; }
 
-export void f_f(uniform float result[], uniform float aa[]) {
-    unsigned int i = (unsigned int)aa[programIndex];
-    result[programIndex] = min((unsigned int)2, i);
+export void f_fu(uniform float r[], uniform float a[], uniform float b) {
+    unsigned int i = (unsigned int)a[programIndex];
+    r[programIndex] = min((unsigned int)2, i);
+    r[width() - 1] = min((unsigned int)10, (unsigned int)b);
 }
 
 export void result(uniform float r[]) { 
     r[programIndex] = 2;
     r[0] = 1;
+    r[programCount - 1] = 5;
 }
 
 
diff --git a/tests/pmuls_i64.ispc b/tests/pmuls_i64.ispc
new file mode 100644
index 00000000..a04ca698
--- /dev/null
+++ b/tests/pmuls_i64.ispc
@@ -0,0 +1,28 @@
+
+export uniform int width() { return programCount; }
+
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
+    uniform int64 a_max = 0x7FFFFFFFFFFFFFFF, a_min = -0x8000000000000000; // max and min signed int64
+    if (programIndex % 3 == 0) {
+        RET[programIndex] = saturating_mul(a_max, (uniform int64) b);
+    }
+    else if (programIndex % 3 == 1) {
+        RET[programIndex] = saturating_mul(a_min, (uniform int64) b);
+    }
+    else {
+        RET[programIndex] = saturating_mul((uniform int64) b,
+                                           (uniform int64) b);
+    } 
+}
+
+export void result(uniform float RET[]) {
+    if (programIndex % 3 == 0) {
+        RET[programIndex] = (uniform int64) 0x7FFFFFFFFFFFFFFF; // max signed int64
+    }
+    else if (programIndex % 3 == 1) {
+        RET[programIndex] = (uniform int64) -0x8000000000000000; // min signed int64
+    }
+    else {
+        RET[programIndex] = (uniform int64) 25;
+    } 
+}
diff --git a/tests/pmuls_vi64.ispc b/tests/pmuls_vi64.ispc
new file mode 100644
index 00000000..32df2fac
--- /dev/null
+++ b/tests/pmuls_vi64.ispc
@@ -0,0 +1,28 @@
+
+export uniform int width() { return programCount; }
+
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
+    varying int64 a_max = 0x7FFFFFFFFFFFFFFF, a_min = -0x8000000000000000; // max and min signed int64
+    if (programIndex % 3 == 0) {
+        RET[programIndex] = saturating_mul(a_max, (varying int64) b);
+    }
+    else if (programIndex % 3 == 1) {
+        RET[programIndex] = saturating_mul(a_min, (varying int64) b);
+    }
+    else {
+        RET[programIndex] = saturating_mul((varying int64) b,
+                                           (varying int64) b);
+    } 
+}
+
+export void result(uniform float RET[]) {
+    if (programIndex % 3 == 0) {
+        RET[programIndex] = (varying int64) 0x7FFFFFFFFFFFFFFF; // max signed int64
+    }
+    else if (programIndex % 3 == 1) {
+        RET[programIndex] = (varying int64) -0x8000000000000000; // min signed int64
+    }
+    else {
+        RET[programIndex] = (varying int64) 25;
+    } 
+}
diff --git a/tests/pmulus_i64.ispc b/tests/pmulus_i64.ispc
new file mode 100644
index 00000000..179902a3
--- /dev/null
+++ b/tests/pmulus_i64.ispc
@@ -0,0 +1,28 @@
+
+export uniform int width() { return programCount; }
+
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
+    uniform unsigned int64 a_max = 0xFFFFFFFFFFFFFFFF, a_min = 0; // max and min unsigned int64
+    if (programIndex % 3 == 0) {
+        RET[programIndex] = saturating_mul(a_max, (uniform unsigned int64) b);
+    }
+    else if (programIndex % 3 == 1) {
+        RET[programIndex] = saturating_mul(a_min, (uniform unsigned int64) -b);
+    }
+    else {
+        RET[programIndex] = saturating_mul((uniform unsigned int64) b,
+                                           (uniform unsigned int64) b);
+    } 
+}
+
+export void result(uniform float RET[]) {
+    if (programIndex % 3 == 0) {
+        RET[programIndex] = (uniform unsigned int64) 0xFFFFFFFFFFFFFFFF; // max unsigned int64
+    }
+    else if (programIndex % 3 == 1) {
+        RET[programIndex] = (uniform unsigned int64) 0; // min unsigned int64
+    }
+    else {
+        RET[programIndex] = (uniform unsigned int64) 25;
+    } 
+}
diff --git a/tests/pmulus_vi64.ispc b/tests/pmulus_vi64.ispc
new file mode 100644
index 00000000..43ae9aac
--- /dev/null
+++ b/tests/pmulus_vi64.ispc
@@ -0,0 +1,28 @@
+
+export uniform int width() { return programCount; }
+
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
+    varying unsigned int64 a_max = 0xFFFFFFFFFFFFFFFF, a_min = 0; // max and min unsigned int64
+    if (programIndex % 3 == 0) {
+        RET[programIndex] = saturating_mul(a_max, (varying unsigned int64) b);
+    }
+    else if (programIndex % 3 == 1) {
+        RET[programIndex] = saturating_mul(a_min, (varying unsigned int64) -b);
+    }
+    else {
+        RET[programIndex] = saturating_mul((varying unsigned int64) b,
+                                           (varying unsigned int64) b);
+    } 
+}
+
+export void result(uniform float RET[]) {
+    if (programIndex % 3 == 0) {
+        RET[programIndex] = (varying unsigned int64) 0xFFFFFFFFFFFFFFFF; // max unsigned int64
+    }
+    else if (programIndex % 3 == 1) {
+        RET[programIndex] = (varying unsigned int64) 0; // min unsigned int64
+    }
+    else {
+        RET[programIndex] = (varying unsigned int64) 25;
+    } 
+}
diff --git a/type.cpp b/type.cpp
index 00795737..b22f9283 100644
--- a/type.cpp
+++ b/type.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2013, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -50,9 +50,9 @@
   #include <llvm/IR/Value.h>
   #include <llvm/IR/Module.h>
 #endif
-#if defined(LLVM_3_1)
-  #include <llvm/Analysis/DebugInfo.h>
-  #include <llvm/Analysis/DIBuilder.h>
+#if defined(LLVM_3_5)
+  #include <llvm/IR/DebugInfo.h>
+  #include <llvm/IR/DIBuilder.h>
 #else
   #include <llvm/DebugInfo.h>
   #include <llvm/DIBuilder.h>
@@ -81,6 +81,7 @@ lShouldPrintName(const std::string &name) {
     the given element type. */
 static llvm::DIType
 lCreateDIArray(llvm::DIType eltType, int count) {
+#ifdef LLVM_3_2
     int lowerBound = 0, upperBound = count-1;
 
     if (count == 0) {
@@ -90,6 +91,9 @@ lCreateDIArray(llvm::DIType eltType, int count) {
     }
 
     llvm::Value *sub = m->diBuilder->getOrCreateSubrange(lowerBound, upperBound);
+#else // LLVM 3.3+
+    llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, count);
+#endif
     std::vector<llvm::Value *> subs;
     subs.push_back(sub);
     llvm::DIArray subArray = m->diBuilder->getOrCreateArray(subs);
@@ -224,7 +228,7 @@ Type::IsReferenceType() const {
 
 bool
 Type::IsVoidType() const {
-    return this == AtomicType::Void;
+    return EqualIgnoringConst(this, AtomicType::Void);
 }
 
 bool
@@ -286,7 +290,7 @@ AtomicType::GetAsUnsignedType() const {
 
 const AtomicType *
 AtomicType::GetAsConstType() const {
-    if (basicType == TYPE_VOID || isConst == true)
+    if (isConst == true)
         return this;
 
     if (asOtherConstType == NULL) {
@@ -299,7 +303,7 @@ AtomicType::GetAsConstType() const {
 
 const AtomicType *
 AtomicType::GetAsNonConstType() const {
-    if (basicType == TYPE_VOID || isConst == false)
+    if (isConst == false)
         return this;
 
     if (asOtherConstType == NULL) {
@@ -376,8 +380,8 @@ AtomicType::ResolveUnboundVariability(Variability v) const {
 std::string
 AtomicType::GetString() const {
     std::string ret;
+    if (isConst)   ret += "const ";
     if (basicType != TYPE_VOID) {
-        if (isConst)   ret += "const ";
         ret += variability.GetString();
         ret += " ";
     }
@@ -456,15 +460,9 @@ AtomicType::GetCDeclaration(const std::string &name) const {
         ret += name;
     }
 
-    if (variability == Variability::Varying ||
-        variability == Variability::SOA) {
+    if (variability == Variability::SOA) {
         char buf[32];
-        // get program count
-        // g->mangleFunctionsNamesWithTarget - hack check for void *
-        int vWidth = (variability == Variability::Varying) ? 
-                        g->target->getVectorWidth() :
-                        variability.soaWidth;
-        sprintf(buf, "[%d]", vWidth);
+        sprintf(buf, "[%d]", variability.soaWidth);
         ret += buf;
     }
 
@@ -571,7 +569,11 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const {
     }
     else if (variability == Variability::Varying) {
         llvm::DIType unifType = GetAsUniformType()->GetDIType(scope);
+#ifdef LLVM_3_2
         llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth()-1);
+#else // LLVM 3.3+
+        llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth());
+#endif
         llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub);
         uint64_t size =  unifType.getSizeInBits()  * g->target->getVectorWidth();
         uint64_t align = unifType.getAlignInBits() * g->target->getVectorWidth();
@@ -838,7 +840,11 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const {
     case Variability::Uniform:
         return diType;
     case Variability::Varying: {
+#ifdef LLVM_3_2
         llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth()-1);
+#else // LLVM 3.3+
+        llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target->getVectorWidth());
+#endif
         llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub);
         uint64_t size =  diType.getSizeInBits()  * g->target->getVectorWidth();
         uint64_t align = diType.getAlignInBits() * g->target->getVectorWidth();
@@ -1096,20 +1102,27 @@ PointerType::GetCDeclaration(const std::string &name) const {
     }
 
     std::string ret = baseType->GetCDeclaration("");
+    
+    bool baseIsBasicVarying = (IsBasicType(baseType)) && (baseType->IsVaryingType());
+    
+    if (baseIsBasicVarying) ret += std::string("(");
     ret += std::string(" *");
     if (isConst) ret += " const";
     ret += std::string(" ");
     ret += name;
+    if (baseIsBasicVarying) ret += std::string(")");
 
-    if (variability == Variability::SOA ||
-        variability == Variability::Varying) {
-        int vWidth = (variability == Variability::Varying) ?
-            g->target->getVectorWidth() :
-            variability.soaWidth;
+    if (variability == Variability::SOA) {
         char buf[32];
-        sprintf(buf, "[%d]", vWidth);
+        sprintf(buf, "[%d]", variability.soaWidth);
         ret += buf;
     }
+    if (baseIsBasicVarying) {
+      int vWidth = g->target->getVectorWidth();
+      char buf[32];
+      sprintf(buf, "[%d]", vWidth);
+      ret += buf;
+    }
 
     return ret;
 }
@@ -1154,7 +1167,7 @@ PointerType::LLVMType(llvm::LLVMContext *ctx) const {
         if (ftype != NULL)
             ptype = llvm::PointerType::get(ftype->LLVMFunctionType(ctx), 0);
         else {
-            if (baseType == AtomicType::Void)
+            if (baseType->IsVoidType())
                 ptype = LLVMTypes::VoidPointerType;
             else
                 ptype = llvm::PointerType::get(baseType->LLVMType(ctx), 0);
@@ -1222,7 +1235,7 @@ ArrayType::ArrayType(const Type *c, int a)
     : SequentialType(ARRAY_TYPE), child(c), numElements(a) {
     // 0 -> unsized array.
     Assert(numElements >= 0);
-    Assert(Type::Equal(c, AtomicType::Void) == false);
+    Assert(c->IsVoidType() == false);
 }
 
 
@@ -1720,7 +1733,11 @@ VectorType::LLVMType(llvm::LLVMContext *ctx) const {
 llvm::DIType
 VectorType::GetDIType(llvm::DIDescriptor scope) const {
     llvm::DIType eltType = base->GetDIType(scope);
+#ifdef LLVM_3_2
     llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, numElements-1);
+#else // LLVM 3.3+
+    llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, numElements);
+#endif
     llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub);
 
     uint64_t sizeBits = eltType.getSizeInBits() * numElements;
@@ -1890,6 +1907,10 @@ StructType::StructType(const std::string &n, const llvm::SmallVector<const Type
     }
 }
 
+const std::string 
+StructType::GetCStructName() const {
+  return lMangleStructName(name, variability);
+}
 
 Variability
 StructType::GetVariability() const  {
@@ -2078,7 +2099,7 @@ std::string
 StructType::GetCDeclaration(const std::string &n) const {
     std::string ret;
     if (isConst) ret += "const ";
-    ret += std::string("struct ") + name;
+    ret += std::string("struct ") + GetCStructName();
     if (lShouldPrintName(n)) {
         ret += std::string(" ") + n;
 
@@ -2994,7 +3015,7 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
             Assert(m->errorCount > 0);
             return NULL;
         }
-        Assert(Type::Equal(paramTypes[i], AtomicType::Void) == false);
+        Assert(paramTypes[i]->IsVoidType() == false);
 
         llvm::Type *t = paramTypes[i]->LLVMType(ctx);
         if (t == NULL) {
diff --git a/type.h b/type.h
index 0337be6e..9093af59 100644
--- a/type.h
+++ b/type.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2012, Intel Corporation
+  Copyright (c) 2010-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -714,7 +714,8 @@ public:
     const SourcePos &GetElementPosition(int i) const { return elementPositions[i]; }
 
     /** Returns the name of the structure type.  (e.g. struct Foo -> "Foo".) */
-    const std::string &GetStructName() const { return name; }
+    const std::string &GetStructName() const  { return name; }
+    const std::string GetCStructName() const;
 
 private:
     static bool checkIfCanBeSOA(const StructType *st);
diff --git a/util.cpp b/util.cpp
index 6b121988..b9b5858a 100644
--- a/util.cpp
+++ b/util.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2013, Intel Corporation
+  Copyright (c) 2010-2014, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -577,7 +577,7 @@ GetDirectoryAndFileName(const std::string &currentDirectory,
     const char *basenameStart = strrchr(fp, '/');
     Assert(basenameStart != NULL);
     ++basenameStart;
-    Assert(basenameStart != '\0');
+    Assert(basenameStart[0] != '\0');
     *filename = basenameStart;
     *directory = std::string(fp, basenameStart - fp);
 #endif // ISPC_IS_WINDOWS
diff --git a/util.h b/util.h
index 7edf71f7..11d843c4 100644
--- a/util.h
+++ b/util.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2013, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without