diff --git a/Makefile b/Makefile
index e39eb831..e9422564 100644
--- a/Makefile
+++ b/Makefile
@@ -2,6 +2,15 @@
 # ispc Makefile
 #
 
+# If you have your own special version of llvm and/or clang, change
+# these variables to match.
+LLVM_CONFIG=$(shell which llvm-config)
+CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
+
+# Add llvm bin to the path so any scripts run will go to the right llvm-config
+LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
+export PATH:=$(LLVM_BIN):$(PATH)
+
 ARCH_OS = $(shell uname)
 ifeq ($(ARCH_OS), Darwin)
 	ARCH_OS2 = "OSX"
@@ -10,10 +19,10 @@ else
 endif
 ARCH_TYPE = $(shell arch)
 
-ifeq ($(shell llvm-config --version), 3.1svn)
+ifeq ($(shell $(LLVM_CONFIG) --version), 3.1svn)
   LLVM_LIBS=-lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker			\
 	-lLLVMArchive -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo	\
-	-lLLVMBitWriter -lLLVMTableGen -lLLVMCBackendInfo			\
+	-lLLVMBitWriter -lLLVMTableGen 			\
 	-lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMSelectionDAG		\
 	-lLLVMAsmPrinter -lLLVMX86AsmParser -lLLVMX86Desc -lLLVMX86Info		\
 	-lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCDisassembler	-lLLVMMCParser	\
@@ -22,18 +31,18 @@ ifeq ($(shell llvm-config --version), 3.1svn)
 	-lLLVMExecutionEngine -lLLVMTarget -lLLVMMC -lLLVMObject -lLLVMCore 	\
 	-lLLVMSupport
 else
-  LLVM_LIBS=$(shell llvm-config --libs)
+  LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs)
 endif
 
 CLANG=clang
 CLANG_LIBS = -lclangFrontend -lclangDriver \
              -lclangSerialization -lclangParse -lclangSema \
              -lclangAnalysis -lclangAST -lclangLex -lclangBasic
-ifeq ($(shell llvm-config --version), 3.1svn)
+ifeq ($(shell $(LLVM_CONFIG) --version), 3.1svn)
   CLANG_LIBS += -lclangEdit
 endif
 
-ISPC_LIBS=$(shell llvm-config --ldflags) $(CLANG_LIBS) $(LLVM_LIBS) \
+ISPC_LIBS=$(shell $(LLVM_CONFIG) --ldflags) $(CLANG_LIBS) $(LLVM_LIBS) \
 	-lpthread
 
 ifeq ($(ARCH_OS),Linux)
@@ -44,8 +53,8 @@ ifeq ($(ARCH_OS2),Msys)
 	ISPC_LIBS += -lshlwapi -limagehlp -lpsapi
 endif
 
-LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
-LLVM_VERSION=LLVM_$(shell llvm-config --version | sed s/\\./_/)
+LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
+LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed s/\\./_/)
 LLVM_VERSION_DEF=-D$(LLVM_VERSION)
 
 BUILD_DATE=$(shell date +%Y%m%d)
@@ -54,7 +63,8 @@ BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
 CXX=g++
 CPP=cpp
 OPT=-g3
-CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -Wall $(LLVM_VERSION_DEF) \
+CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE)  \
+	-Wall $(LLVM_VERSION_DEF) \
 	-DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\""
 
 LDFLAGS=
diff --git a/ast.cpp b/ast.cpp
index c89f00bb..752585f1 100644
--- a/ast.cpp
+++ b/ast.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2011, Intel Corporation
+  Copyright (c) 2011-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -32,8 +32,10 @@
 */
 
 /** @file ast.cpp
-    @brief 
-*/
+
+    @brief General functionality related to abstract syntax trees and
+    traversal of them.
+ */
 
 #include "ast.h"
 #include "expr.h"
@@ -53,10 +55,10 @@ ASTNode::~ASTNode() {
 // AST
 
 void
-AST::AddFunction(Symbol *sym, const std::vector<Symbol *> &args, Stmt *code) {
+AST::AddFunction(Symbol *sym, Stmt *code) {
     if (sym == NULL)
         return;
-    functions.push_back(new Function(sym, args, code));
+    functions.push_back(new Function(sym, code));
 }
 
 
@@ -151,7 +153,7 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
         else if ((ls = dynamic_cast<LabeledStmt *>(node)) != NULL)
             ls->stmt = (Stmt *)WalkAST(ls->stmt, preFunc, postFunc, data);
         else if ((rs = dynamic_cast<ReturnStmt *>(node)) != NULL)
-            rs->val = (Expr *)WalkAST(rs->val, preFunc, postFunc, data);
+            rs->expr = (Expr *)WalkAST(rs->expr, preFunc, postFunc, data);
         else if ((sl = dynamic_cast<StmtList *>(node)) != NULL) {
             std::vector<Stmt *> &sls = sl->stmts;
             for (unsigned int i = 0; i < sls.size(); ++i)
@@ -305,19 +307,39 @@ TypeCheck(Stmt *stmt) {
 }
 
 
+struct CostData {
+    CostData() { cost = foreachDepth = 0; }
+
+    int cost;
+    int foreachDepth;
+};
+
+
 static bool
-lCostCallback(ASTNode *node, void *c) {
-    int *cost = (int *)c;
-    *cost += node->EstimateCost();
+lCostCallbackPre(ASTNode *node, void *d) {
+    CostData *data = (CostData *)d;
+    if (dynamic_cast<ForeachStmt *>(node) != NULL)
+        ++data->foreachDepth;
+    if (data->foreachDepth == 0)
+        data->cost += node->EstimateCost();
     return true;
 }
 
 
+static ASTNode *
+lCostCallbackPost(ASTNode *node, void *d) {
+    CostData *data = (CostData *)d;
+    if (dynamic_cast<ForeachStmt *>(node) != NULL)
+        --data->foreachDepth;
+    return node;
+}
+
+
 int
 EstimateCost(ASTNode *root) {
-    int cost = 0;
-    WalkAST(root, lCostCallback, NULL, &cost);
-    return cost;
+    CostData data;
+    WalkAST(root, lCostCallbackPre, lCostCallbackPost, &data);
+    return data.cost;
 }
 
 
@@ -363,6 +385,16 @@ lCheckAllOffSafety(ASTNode *node, void *data) {
         return false;
     }
 
+    if (dynamic_cast<ForeachStmt *>(node) != NULL) {
+        // foreach() statements also shouldn't be run with an all-off mask.
+        // Since they re-establish an 'all on' mask, this would be pretty
+        // unintuitive.  (More generally, it's possibly a little strange to
+        // allow foreach() in the presence of any non-uniform control
+        // flow...)
+        *okPtr = false;
+        return false;
+    }
+
     if (g->target.allOffMaskIsSafe == true)
         // Don't worry about memory accesses if we have a target that can
         // safely run them with the mask all off
diff --git a/ast.h b/ast.h
index 0f73677b..f03d7343 100644
--- a/ast.h
+++ b/ast.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2011, Intel Corporation
+  Copyright (c) 2011-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -84,8 +84,7 @@ class AST {
 public:
     /** Add the AST for a function described by the given declaration
         information and source code. */
-    void AddFunction(Symbol *sym, const std::vector<Symbol *> &args, 
-                     Stmt *code);
+    void AddFunction(Symbol *sym, Stmt *code);
 
     /** Generate LLVM IR for all of the functions into the current
         module. */
diff --git a/builtins.cpp b/builtins.cpp
index 0e34596d..405c8290 100644
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -291,7 +291,7 @@ lCheckModuleIntrinsics(llvm::Module *module) {
         if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) {
             llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID();
             Assert(id != 0);
-            LLVM_TYPE_CONST llvm::Type *intrinsicType = 
+            llvm::Type *intrinsicType = 
                 llvm::Intrinsic::getType(*g->ctx, id);
             intrinsicType = llvm::PointerType::get(intrinsicType, 0);
             Assert(func->getType() == intrinsicType);
@@ -411,12 +411,16 @@ lSetInternalFunctions(llvm::Module *module) {
         "__extract_int64",
         "__extract_int8",
         "__fastmath",
+        "__float_to_half_uniform",
+        "__float_to_half_varying",
         "__floatbits_uniform_int32",
         "__floatbits_varying_int32",
         "__floor_uniform_double",
         "__floor_uniform_float",
         "__floor_varying_double",
         "__floor_varying_float",
+        "__half_to_float_uniform",
+        "__half_to_float_varying",
         "__insert_int16",
         "__insert_int32",
         "__insert_int64",
@@ -616,9 +620,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
 
         std::string(linkError);
         if (llvm::Linker::LinkModules(module, bcModule, 
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
                                       llvm::Linker::DestroySource,
-#endif // LLVM_3_0
                                       &linkError))
             Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
         lSetInternalFunctions(module);
@@ -639,7 +641,7 @@ lDefineConstantInt(const char *name, int val, llvm::Module *module,
         new Symbol(name, SourcePos(), AtomicType::UniformInt32->GetAsConstType(),
                    SC_STATIC);
     pw->constValue = new ConstExpr(pw->type, val, SourcePos());
-    LLVM_TYPE_CONST llvm::Type *ltype = LLVMTypes::Int32Type;
+    llvm::Type *ltype = LLVMTypes::Int32Type;
     llvm::Constant *linit = LLVMInt32(val);
     pw->storagePtr = new llvm::GlobalVariable(*module, ltype, true, 
                                               llvm::GlobalValue::InternalLinkage,
@@ -679,7 +681,7 @@ lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
         pi[i] = i;
     pidx->constValue = new ConstExpr(pidx->type, pi, SourcePos());
 
-    LLVM_TYPE_CONST llvm::Type *ltype = LLVMTypes::Int32VectorType;
+    llvm::Type *ltype = LLVMTypes::Int32VectorType;
     llvm::Constant *linit = LLVMInt32Vector(pi);
     pidx->storagePtr = new llvm::GlobalVariable(*module, ltype, true, 
                                                 llvm::GlobalValue::InternalLinkage, linit, 
diff --git a/builtins/util.m4 b/builtins/util.m4
index 26cbfafb..501f2e47 100644
--- a/builtins/util.m4
+++ b/builtins/util.m4
@@ -2880,11 +2880,11 @@ define <$1 x $2> @__gather_base_offsets32_$2(i8 * %ptr, <$1 x i32> %offsets, i32
   %newDelta = load <$1 x i32> * %deltaPtr
 
   %ret0 = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %newOffsets,
-                                            i32 %offset_scale, <$1 x i32> %offset_delta,
+                                            i32 %offset_scale, <$1 x i32> %newDelta,
                                             <$1 x $2> undef, i32 0)
   forloop(lane, 1, eval($1-1), 
           `patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr, 
-                                <$1 x i32> %newOffsets, i32 %offset_scale, <$1 x i32> %offset_delta,
+                                <$1 x i32> %newOffsets, i32 %offset_scale, <$1 x i32> %newDelta,
                                 <$1 x $2> %retPREV, i32 LANE)
                     ', `LANE', lane), `PREV', eval(lane-1))')
   ret <$1 x $2> %ret`'eval($1-1)
diff --git a/cbackend.cpp b/cbackend.cpp
index b1a0a907..71ce7de6 100644
--- a/cbackend.cpp
+++ b/cbackend.cpp
@@ -12,9 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifdef LLVM_2_9
-#warning "The C++ backend isn't supported when building with LLVM 2.9"
-#else
+#include <stdio.h>
 
 #ifndef _MSC_VER
 #include <inttypes.h>
@@ -933,6 +931,20 @@ void CWriter::printConstantDataSequential(ConstantDataSequential *CDS,
 }
 #endif // LLVM_3_1svn
 
+#ifdef LLVM_3_1svn
+static inline std::string ftostr(const APFloat& V) {
+  std::string Buf;
+  if (&V.getSemantics() == &APFloat::IEEEdouble) {
+    raw_string_ostream(Buf) << V.convertToDouble();
+    return Buf;
+  } else if (&V.getSemantics() == &APFloat::IEEEsingle) {
+    raw_string_ostream(Buf) << (double)V.convertToFloat();
+    return Buf;
+  }
+  return "<unknown format in ftostr>"; // error
+}
+#endif // LLVM_3_1svn
+
 // isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
 // textually as a double (rather than as a reference to a stack-allocated
 // variable). We decide this by converting CFP to a string and back into a
@@ -2071,69 +2083,16 @@ bool CWriter::doInitialization(Module &M) {
 
   Out << "#include \"" << includeName << "\"\n";
 
-  generateCompilerSpecificCode(Out, TD);
-
-  // Function declarations
-  Out << "\n/* Function Declarations */\n";
+  Out << "\n/* Basic Library Function Declarations */\n";
   Out << "extern \"C\" {\n";
   Out << "int puts(unsigned char *);\n";
   Out << "unsigned int putchar(unsigned int);\n";
   Out << "int fflush(void *);\n";
   Out << "int printf(const unsigned char *, ...);\n";
   Out << "uint8_t *memcpy(uint8_t *, uint8_t *, uint64_t );\n";
+  Out << "}\n\n";
 
-  // Store the intrinsics which will be declared/defined below.
-  SmallVector<const Function*, 8> intrinsicsToDefine;
-
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    // Don't print declarations for intrinsic functions.
-    // Store the used intrinsics, which need to be explicitly defined.
-    if (I->isIntrinsic()) {
-      switch (I->getIntrinsicID()) {
-        default:
-          break;
-        case Intrinsic::uadd_with_overflow:
-        case Intrinsic::sadd_with_overflow:
-          intrinsicsToDefine.push_back(I);
-          break;
-      }
-      continue;
-    }
-
-    if (I->getName() == "setjmp" || I->getName() == "abort" ||
-        I->getName() == "longjmp" || I->getName() == "_setjmp" ||
-        I->getName() == "memset" || I->getName() == "memset_pattern16" ||
-        I->getName() == "puts" ||
-        I->getName() == "printf" || I->getName() == "putchar" ||
-        I->getName() == "fflush" || I->getName() == "malloc" ||
-        I->getName() == "free")
-      continue;
-
-    // Don't redeclare ispc's own intrinsics
-    std::string name = I->getName();
-    if (name.size() > 2 && name[0] == '_' && name[1] == '_')
-        continue;
-
-    if (I->hasExternalWeakLinkage())
-      Out << "extern ";
-    printFunctionSignature(I, true);
-    if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
-      Out << " __ATTRIBUTE_WEAK__";
-    if (I->hasExternalWeakLinkage())
-      Out << " __EXTERNAL_WEAK__";
-    if (StaticCtors.count(I))
-      Out << " __ATTRIBUTE_CTOR__";
-    if (StaticDtors.count(I))
-      Out << " __ATTRIBUTE_DTOR__";
-    if (I->hasHiddenVisibility())
-      Out << " __HIDDEN__";
-
-    if (I->hasName() && I->getName()[0] == 1)
-      Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
-
-    Out << ";\n";
-  }
-  Out << "}\n";
+  generateCompilerSpecificCode(Out, TD);
 
   // Provide a definition for `bool' if not compiling with a C++ compiler.
   Out << "\n"
@@ -2303,6 +2262,63 @@ bool CWriter::doInitialization(Module &M) {
       }
   }
 
+  // Function declarations
+  Out << "\n/* Function Declarations */\n";
+  Out << "extern \"C\" {\n";
+
+  // Store the intrinsics which will be declared/defined below.
+  SmallVector<const Function*, 8> intrinsicsToDefine;
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    // Don't print declarations for intrinsic functions.
+    // Store the used intrinsics, which need to be explicitly defined.
+    if (I->isIntrinsic()) {
+      switch (I->getIntrinsicID()) {
+        default:
+          break;
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::sadd_with_overflow:
+          intrinsicsToDefine.push_back(I);
+          break;
+      }
+      continue;
+    }
+
+    if (I->getName() == "setjmp" || I->getName() == "abort" ||
+        I->getName() == "longjmp" || I->getName() == "_setjmp" ||
+        I->getName() == "memset" || I->getName() == "memset_pattern16" ||
+        I->getName() == "puts" ||
+        I->getName() == "printf" || I->getName() == "putchar" ||
+        I->getName() == "fflush" || I->getName() == "malloc" ||
+        I->getName() == "free")
+      continue;
+
+    // Don't redeclare ispc's own intrinsics
+    std::string name = I->getName();
+    if (name.size() > 2 && name[0] == '_' && name[1] == '_')
+        continue;
+
+    if (I->hasExternalWeakLinkage())
+      Out << "extern ";
+    printFunctionSignature(I, true);
+    if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+      Out << " __ATTRIBUTE_WEAK__";
+    if (I->hasExternalWeakLinkage())
+      Out << " __EXTERNAL_WEAK__";
+    if (StaticCtors.count(I))
+      Out << " __ATTRIBUTE_CTOR__";
+    if (StaticDtors.count(I))
+      Out << " __ATTRIBUTE_DTOR__";
+    if (I->hasHiddenVisibility())
+      Out << " __HIDDEN__";
+
+    if (I->hasName() && I->getName()[0] == 1)
+      Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
+
+    Out << ";\n";
+  }
+  Out << "}\n\n";
+
   if (!M.empty())
     Out << "\n\n/* Function Bodies */\n";
 
@@ -4442,5 +4458,3 @@ WriteCXXFile(llvm::Module *module, const char *fn, int vectorWidth,
 
     return true;
 }
-
-#endif // LLVM_2_9
diff --git a/ctx.cpp b/ctx.cpp
index 5f5258e8..c76ec1b8 100644
--- a/ctx.cpp
+++ b/ctx.cpp
@@ -251,7 +251,7 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
     if (!returnType || Type::Equal(returnType, AtomicType::Void))
         returnValuePtr = NULL;
     else {
-        LLVM_TYPE_CONST llvm::Type *ftype = returnType->LLVMType(g->ctx);
+        llvm::Type *ftype = returnType->LLVMType(g->ctx);
         returnValuePtr = AllocaInst(ftype, "return_value_memory");
     }
 
@@ -1050,7 +1050,7 @@ FunctionEmitContext::SwitchInst(llvm::Value *expr, llvm::BasicBlock *bbDefault,
     caseBlocks = new std::vector<std::pair<int, llvm::BasicBlock *> >(bbCases);
     nextBlocks = new std::map<llvm::BasicBlock *, llvm::BasicBlock *>(bbNext);
     switchConditionWasUniform = 
-        (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(expr->getType()) == false);
+        (llvm::isa<llvm::VectorType>(expr->getType()) == false);
 
     if (switchConditionWasUniform == true) {
         // For a uniform switch condition, just wire things up to the LLVM
@@ -1325,12 +1325,12 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
     if (g->target.maskBitCount == 1)
         return b;
 
-    LLVM_TYPE_CONST llvm::ArrayType *at = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(b->getType());
+    llvm::ArrayType *at = 
+        llvm::dyn_cast<llvm::ArrayType>(b->getType());
     if (at) {
         // If we're given an array of vectors of i1s, then do the
         // conversion for each of the elements
-        LLVM_TYPE_CONST llvm::Type *boolArrayType = 
+        llvm::Type *boolArrayType = 
             llvm::ArrayType::get(LLVMTypes::BoolVectorType, at->getNumElements());
         llvm::Value *ret = llvm::UndefValue::get(boolArrayType);
 
@@ -1359,13 +1359,8 @@ lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
                                                  llvm::GlobalValue::InternalLinkage,
                                                  sConstant, s);
     llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(0) };
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
     return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock);
-#else
-    return llvm::GetElementPtrInst::Create(sPtr, &indices[0], &indices[2],
-                                           "sptr", bblock);
-#endif
 }
 
 
@@ -1498,16 +1493,16 @@ FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym) {
     Otherwise return zero.
  */
 static int
-lArrayVectorWidth(LLVM_TYPE_CONST llvm::Type *t) {
-    LLVM_TYPE_CONST llvm::ArrayType *arrayType = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(t);
+lArrayVectorWidth(llvm::Type *t) {
+    llvm::ArrayType *arrayType = 
+        llvm::dyn_cast<llvm::ArrayType>(t);
     if (arrayType == NULL)
         return 0;
 
     // We shouldn't be seeing arrays of anything but vectors being passed
     // to things like FunctionEmitContext::BinaryOperator() as operands.
-    LLVM_TYPE_CONST llvm::VectorType *vectorElementType = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(arrayType->getElementType());
+    llvm::VectorType *vectorElementType = 
+        llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
     Assert((vectorElementType != NULL &&
             (int)vectorElementType->getNumElements() == g->target.vectorWidth));
            
@@ -1525,7 +1520,7 @@ FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst,
     }
 
     Assert(v0->getType() == v1->getType());
-    LLVM_TYPE_CONST llvm::Type *type = v0->getType();
+    llvm::Type *type = v0->getType();
     int arraySize = lArrayVectorWidth(type);
     if (arraySize == 0) {
         llvm::Instruction *bop = 
@@ -1559,7 +1554,7 @@ FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) {
     // Similarly to BinaryOperator, do the operation on all the elements of
     // the array if we're given an array type; otherwise just do the
     // regular llvm operation.
-    LLVM_TYPE_CONST llvm::Type *type = v->getType();
+    llvm::Type *type = v->getType();
     int arraySize = lArrayVectorWidth(type);
     if (arraySize == 0) {
         llvm::Instruction *binst = 
@@ -1584,18 +1579,18 @@ FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) {
 // Given the llvm Type that represents an ispc VectorType, return an
 // equally-shaped type with boolean elements.  (This is the type that will
 // be returned from CmpInst with ispc VectorTypes).
-static LLVM_TYPE_CONST llvm::Type *
-lGetMatchingBoolVectorType(LLVM_TYPE_CONST llvm::Type *type) {
-    LLVM_TYPE_CONST llvm::ArrayType *arrayType = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(type);
+static llvm::Type *
+lGetMatchingBoolVectorType(llvm::Type *type) {
+    llvm::ArrayType *arrayType = 
+        llvm::dyn_cast<llvm::ArrayType>(type);
     Assert(arrayType != NULL);
 
-    LLVM_TYPE_CONST llvm::VectorType *vectorElementType = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(arrayType->getElementType());
+    llvm::VectorType *vectorElementType = 
+        llvm::dyn_cast<llvm::VectorType>(arrayType->getElementType());
     Assert(vectorElementType != NULL);
     Assert((int)vectorElementType->getNumElements() == g->target.vectorWidth);
 
-    LLVM_TYPE_CONST llvm::Type *base = 
+    llvm::Type *base = 
         llvm::VectorType::get(LLVMTypes::BoolType, g->target.vectorWidth);
     return llvm::ArrayType::get(base, arrayType->getNumElements());
 }
@@ -1612,7 +1607,7 @@ FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst,
     }
 
     Assert(v0->getType() == v1->getType());
-    LLVM_TYPE_CONST llvm::Type *type = v0->getType();
+    llvm::Type *type = v0->getType();
     int arraySize = lArrayVectorWidth(type);
     if (arraySize == 0) {
         llvm::Instruction *ci = 
@@ -1622,7 +1617,7 @@ FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst,
         return ci;
     }
     else {
-        LLVM_TYPE_CONST llvm::Type *boolType = lGetMatchingBoolVectorType(type);
+        llvm::Type *boolType = lGetMatchingBoolVectorType(type);
         llvm::Value *ret = llvm::UndefValue::get(boolType);
         for (int i = 0; i < arraySize; ++i) {
             llvm::Value *a = ExtractInst(v0, i);
@@ -1643,10 +1638,10 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
     }
 
     llvm::Value *ret = NULL;
-    LLVM_TYPE_CONST llvm::Type *eltType = value->getType();
+    llvm::Type *eltType = value->getType();
 
-    LLVM_TYPE_CONST llvm::PointerType *pt = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(eltType);
+    llvm::PointerType *pt = 
+        llvm::dyn_cast<llvm::PointerType>(eltType);
     if (pt != NULL) {
         // Varying pointers are represented as vectors of i32/i64s
         ret = llvm::UndefValue::get(LLVMTypes::VoidPointerVectorType);
@@ -1670,7 +1665,7 @@ FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
 
 llvm::Value *
 FunctionEmitContext::BitCastInst(llvm::Value *value, 
-                                 LLVM_TYPE_CONST llvm::Type *type, 
+                                 llvm::Type *type, 
                                  const char *name) {
     if (value == NULL) {
         Assert(m->errorCount > 0);
@@ -1691,11 +1686,11 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
         return NULL;
     }
 
-    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(value->getType()))
+    if (llvm::isa<llvm::VectorType>(value->getType()))
         // no-op for varying pointers; they're already vectors of ints
         return value;
 
-    LLVM_TYPE_CONST llvm::Type *type = LLVMTypes::PointerIntType;
+    llvm::Type *type = LLVMTypes::PointerIntType;
     llvm::Instruction *inst = 
         new llvm::PtrToIntInst(value, type, name ? name : "ptr2int", bblock);
     AddDebugPos(inst);
@@ -1705,15 +1700,15 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
 
 llvm::Value *
 FunctionEmitContext::PtrToIntInst(llvm::Value *value, 
-                                  LLVM_TYPE_CONST llvm::Type *toType,
+                                  llvm::Type *toType,
                                   const char *name) {
     if (value == NULL) {
         Assert(m->errorCount > 0);
         return NULL;
     }
 
-    LLVM_TYPE_CONST llvm::Type *fromType = value->getType();
-    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(fromType)) {
+    llvm::Type *fromType = value->getType();
+    if (llvm::isa<llvm::VectorType>(fromType)) {
         // varying pointer
         if (fromType == toType)
             // already the right type--done
@@ -1736,15 +1731,15 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value,
 
 llvm::Value *
 FunctionEmitContext::IntToPtrInst(llvm::Value *value, 
-                                  LLVM_TYPE_CONST llvm::Type *toType,
+                                  llvm::Type *toType,
                                   const char *name) {
     if (value == NULL) {
         Assert(m->errorCount > 0);
         return NULL;
     }
 
-    LLVM_TYPE_CONST llvm::Type *fromType = value->getType();
-    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(fromType)) {
+    llvm::Type *fromType = value->getType();
+    if (llvm::isa<llvm::VectorType>(fromType)) {
         // varying pointer
         if (fromType == toType)
             // done
@@ -1766,7 +1761,7 @@ FunctionEmitContext::IntToPtrInst(llvm::Value *value,
 
 
 llvm::Instruction *
-FunctionEmitContext::TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
+FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type,
                                const char *name) {
     if (value == NULL) {
         Assert(m->errorCount > 0);
@@ -1784,7 +1779,7 @@ FunctionEmitContext::TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *t
 
 llvm::Instruction *
 FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
-                              LLVM_TYPE_CONST llvm::Type *type, const char *name) {
+                              llvm::Type *type, const char *name) {
     if (value == NULL) {
         Assert(m->errorCount > 0);
         return NULL;
@@ -1800,7 +1795,7 @@ FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
 
 
 llvm::Instruction *
-FunctionEmitContext::FPCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, 
+FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type, 
                                 const char *name) {
     if (value == NULL) {
         Assert(m->errorCount > 0);
@@ -1817,7 +1812,7 @@ FunctionEmitContext::FPCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *
 
 
 llvm::Instruction *
-FunctionEmitContext::SExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, 
+FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type, 
                               const char *name) {
     if (value == NULL) {
         Assert(m->errorCount > 0);
@@ -1834,7 +1829,7 @@ FunctionEmitContext::SExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *ty
 
 
 llvm::Instruction *
-FunctionEmitContext::ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, 
+FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type, 
                               const char *name) {
     if (value == NULL) {
         Assert(m->errorCount > 0);
@@ -1865,7 +1860,7 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
     llvm::Value *scale = g->target.SizeOf(scaleType->LLVMType(g->ctx), bblock);
 
     bool indexIsVarying = 
-        llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(index->getType());
+        llvm::isa<llvm::VectorType>(index->getType());
     llvm::Value *offset = NULL;
     if (indexIsVarying == false) {
         // Truncate or sign extend the index as appropriate to a 32 or
@@ -1909,7 +1904,7 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
     // Smear out the pointer to be varying; either the base pointer or the
     // index must be varying for this method to be called.
     bool baseIsUniform = 
-        (llvm::isa<LLVM_TYPE_CONST llvm::PointerType>(basePtr->getType()));
+        (llvm::isa<llvm::PointerType>(basePtr->getType()));
     Assert(baseIsUniform == false || indexIsVarying == true);
     llvm::Value *varyingPtr = baseIsUniform ? 
         SmearUniform(basePtr, "ptr_smear") : basePtr;
@@ -1921,18 +1916,18 @@ FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
 
 void
 FunctionEmitContext::MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1) {
-    LLVM_TYPE_CONST llvm::Type *type0 = (*v0)->getType();
-    LLVM_TYPE_CONST llvm::Type *type1 = (*v1)->getType();
+    llvm::Type *type0 = (*v0)->getType();
+    llvm::Type *type1 = (*v1)->getType();
 
     // First, promote to a vector type if one of the two values is a vector
     // type
-    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(type0) &&
-        !llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(type1)) {
+    if (llvm::isa<llvm::VectorType>(type0) &&
+        !llvm::isa<llvm::VectorType>(type1)) {
         *v1 = SmearUniform(*v1, "smear_v1");
         type1 = (*v1)->getType();
     }
-    if (!llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(type0) &&
-        llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(type1)) {
+    if (!llvm::isa<llvm::VectorType>(type0) &&
+        llvm::isa<llvm::VectorType>(type1)) {
         *v0 = SmearUniform(*v0, "smear_v0");
         type0 = (*v0)->getType();
     }
@@ -1969,7 +1964,7 @@ lComputeSliceIndex(FunctionEmitContext *ctx, int soaWidth,
 
     ctx->MatchIntegerTypes(&indexValue, &ptrSliceOffset);
 
-    LLVM_TYPE_CONST llvm::Type *indexType = indexValue->getType();
+    llvm::Type *indexType = indexValue->getType();
     llvm::Value *shift = LLVMIntAsType(logWidth, indexType);
     llvm::Value *mask = LLVMIntAsType(soaWidth-1, indexType);
 
@@ -1997,10 +1992,10 @@ FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset) {
     // Create a small struct where the first element is the type of the
     // given pointer and the second element is the type of the offset
     // value.
-    std::vector<LLVM_TYPE_CONST llvm::Type *> eltTypes;
+    std::vector<llvm::Type *> eltTypes;
     eltTypes.push_back(ptr->getType());
     eltTypes.push_back(offset->getType());
-    LLVM_TYPE_CONST llvm::StructType *st = 
+    llvm::StructType *st = 
         llvm::StructType::get(*g->ctx, eltTypes);
 
     llvm::Value *ret = llvm::UndefValue::get(st);
@@ -2028,7 +2023,7 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index,
     }
 
     if (ptrType->IsSlice()) {
-        Assert(llvm::isa<LLVM_TYPE_CONST llvm::StructType>(basePtr->getType()));
+        Assert(llvm::isa<llvm::StructType>(basePtr->getType()));
 
         llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
         if (ptrType->IsFrozenSlice() == false) {
@@ -2056,27 +2051,21 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index,
     // Double-check consistency between the given pointer type and its LLVM
     // type.
     if (ptrType->IsUniformType())
-        Assert(llvm::isa<LLVM_TYPE_CONST llvm::PointerType>(basePtr->getType()));
+        Assert(llvm::isa<llvm::PointerType>(basePtr->getType()));
     else if (ptrType->IsVaryingType())
-        Assert(llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(basePtr->getType()));
+        Assert(llvm::isa<llvm::VectorType>(basePtr->getType()));
 
     bool indexIsVaryingType = 
-        llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(index->getType());
+        llvm::isa<llvm::VectorType>(index->getType());
 
     if (indexIsVaryingType == false && ptrType->IsUniformType() == true) {
         // The easy case: both the base pointer and the indices are
         // uniform, so just emit the regular LLVM GEP instruction
         llvm::Value *ind[1] = { index };
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
         llvm::ArrayRef<llvm::Value *> arrayRef(&ind[0], &ind[1]);
         llvm::Instruction *inst = 
             llvm::GetElementPtrInst::Create(basePtr, arrayRef,
                                             name ? name : "gep", bblock);
-#else
-        llvm::Instruction *inst = 
-            llvm::GetElementPtrInst::Create(basePtr, &ind[0], &ind[1], 
-                                            name ? name : "gep", bblock);
-#endif
         AddDebugPos(inst);
         return inst;
     }
@@ -2107,7 +2096,7 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0
         // Similar to the 1D GEP implementation above, for non-frozen slice
         // pointers we do the two-step indexing calculation and then pass
         // the new major index on to a recursive GEP call.
-        Assert(llvm::isa<LLVM_TYPE_CONST llvm::StructType>(basePtr->getType()));
+        Assert(llvm::isa<llvm::StructType>(basePtr->getType()));
         llvm::Value *ptrSliceOffset = ExtractInst(basePtr, 1);
         if (ptrType->IsFrozenSlice() == false) {
             llvm::Value *newSliceOffset;
@@ -2124,25 +2113,19 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0
     }
 
     bool index0IsVaryingType = 
-        llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(index0->getType());
+        llvm::isa<llvm::VectorType>(index0->getType());
     bool index1IsVaryingType = 
-        llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(index1->getType());
+        llvm::isa<llvm::VectorType>(index1->getType());
 
     if (index0IsVaryingType == false && index1IsVaryingType == false && 
         ptrType->IsUniformType() == true) {
         // The easy case: both the base pointer and the indices are
         // uniform, so just emit the regular LLVM GEP instruction
         llvm::Value *indices[2] = { index0, index1 };
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
         llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
         llvm::Instruction *inst = 
             llvm::GetElementPtrInst::Create(basePtr, arrayRef,
                                             name ? name : "gep", bblock);
-#else
-        llvm::Instruction *inst = 
-            llvm::GetElementPtrInst::Create(basePtr, &indices[0], &indices[2], 
-                                            name ? name : "gep", bblock);
-#endif
         AddDebugPos(inst);
         return inst;
     }
@@ -2157,7 +2140,7 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0
         Assert(st != NULL);
 
         bool ptr0IsUniform = 
-            llvm::isa<LLVM_TYPE_CONST llvm::PointerType>(ptr0->getType());
+            llvm::isa<llvm::PointerType>(ptr0->getType());
         const Type *ptr0BaseType = st->GetElementType();
         const Type *ptr0Type = ptr0IsUniform ?
             PointerType::GetUniform(ptr0BaseType) : 
@@ -2192,7 +2175,7 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum,
     // unfortunate...
     llvm::Value *basePtr = fullBasePtr;
     bool baseIsSlicePtr = 
-        llvm::isa<LLVM_TYPE_CONST llvm::StructType>(fullBasePtr->getType());
+        llvm::isa<llvm::StructType>(fullBasePtr->getType());
     const PointerType *rpt;
     if (baseIsSlicePtr) {
         Assert(ptrType != NULL);
@@ -2222,16 +2205,10 @@ FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum,
     if (ptrType == NULL || ptrType->IsUniformType()) {
         // If the pointer is uniform, we can use the regular LLVM GEP.
         llvm::Value *offsets[2] = { LLVMInt32(0), LLVMInt32(elementNum) };
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
         llvm::ArrayRef<llvm::Value *> arrayRef(&offsets[0], &offsets[2]);
         resultPtr = 
             llvm::GetElementPtrInst::Create(basePtr, arrayRef,
                                             name ? name : "struct_offset", bblock);
-#else
-        resultPtr =
-            llvm::GetElementPtrInst::Create(basePtr, &offsets[0], &offsets[2],
-                                            name ? name : "struct_offset", bblock);
-#endif
     }
     else {
         // Otherwise do the math to find the offset and add it to the given
@@ -2286,8 +2263,8 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
         return NULL;
     }
 
-    LLVM_TYPE_CONST llvm::PointerType *pt = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(ptr->getType());
+    llvm::PointerType *pt = 
+        llvm::dyn_cast<llvm::PointerType>(ptr->getType());
     Assert(pt != NULL);
 
     // FIXME: it's not clear to me that we generate unaligned vector loads
@@ -2295,7 +2272,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
     // optimization passes that lower gathers to vector loads, I think..)
     // So remove this??
     int align = 0;
-    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(pt->getElementType()))
+    if (llvm::isa<llvm::VectorType>(pt->getElementType()))
         align = 1;
     llvm::Instruction *inst = new llvm::LoadInst(ptr, name ? name : "load",
                                                  false /* not volatile */,
@@ -2355,7 +2332,7 @@ FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask,
         // If we have a struct/array, we need to decompose it into
         // individual element loads to fill in the result structure since
         // the SOA slice of values we need isn't contiguous in memory...
-        LLVM_TYPE_CONST llvm::Type *llvmReturnType = unifType->LLVMType(g->ctx);
+        llvm::Type *llvmReturnType = unifType->LLVMType(g->ctx);
         llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
 
         for (int i = 0; i < ct->GetElementCount(); ++i) {
@@ -2439,7 +2416,7 @@ FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType,
     Assert(ptrType->IsVaryingType());
 
     const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType();
-    LLVM_TYPE_CONST llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
+    llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
 
     const CollectionType *collectionType = 
         dynamic_cast<const CollectionType *>(ptrType->GetBaseType());
@@ -2547,7 +2524,7 @@ FunctionEmitContext::addGSMetadata(llvm::Value *v, SourcePos pos) {
 
 
 llvm::Value *
-FunctionEmitContext::AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType, 
+FunctionEmitContext::AllocaInst(llvm::Type *llvmType, 
                                 const char *name, int align, 
                                 bool atEntryBlock) {
     if (llvmType == NULL) {
@@ -2573,10 +2550,10 @@ FunctionEmitContext::AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType,
     // unlikely that this array will be loaded into varying variables with
     // what will be aligned accesses if the uniform -> varying load is done
     // in regular chunks.
-    LLVM_TYPE_CONST llvm::ArrayType *arrayType = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(llvmType);
+    llvm::ArrayType *arrayType = 
+        llvm::dyn_cast<llvm::ArrayType>(llvmType);
     if (align == 0 && arrayType != NULL && 
-        !llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(arrayType->getElementType()))
+        !llvm::isa<llvm::VectorType>(arrayType->getElementType()))
         align = 4 * g->target.nativeVectorWidth;
 
     if (align != 0)
@@ -2632,14 +2609,22 @@ FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
     const PointerType *pt = dynamic_cast<const PointerType *>(valueType);
     if (pt != NULL) {
         if (pt->IsSlice()) {
-            // For masked stores of (varying) slice pointers to memory, we
-            // grab the equivalent StructType and make a recursive call to
-            // maskedStore, giving it that type for the pointer type; that
-            // in turn will lead to the base pointer and offset index being
-            // mask stored to memory..
-            const StructType *sliceStructType = pt->GetSliceStructType();
-            ptrType = PointerType::GetUniform(sliceStructType);
-            maskedStore(value, ptr, ptrType, mask);
+            // Masked store of (varying) slice pointer.
+            Assert(pt->IsVaryingType());
+                    
+            // First, extract the pointer from the slice struct and masked
+            // store that.
+            llvm::Value *v0 = ExtractInst(value, 0);
+            llvm::Value *p0 = AddElementOffset(ptr, 0, ptrType);
+            maskedStore(v0, p0, PointerType::GetUniform(pt->GetAsNonSlice()),
+                        mask);
+
+            // And then do same for the integer offset
+            llvm::Value *v1 = ExtractInst(value, 1);
+            llvm::Value *p1 = AddElementOffset(ptr, 1, ptrType);
+            const Type *offsetType = AtomicType::VaryingInt32;
+            maskedStore(v1, p1, PointerType::GetUniform(offsetType), mask);
+
             return;
         }
 
@@ -2783,7 +2768,7 @@ FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr,
     Assert(pt != NULL || 
            dynamic_cast<const AtomicType *>(valueType) != NULL);
 
-    LLVM_TYPE_CONST llvm::Type *type = value->getType();
+    llvm::Type *type = value->getType();
     const char *funcName = NULL;
     if (pt != NULL)
         funcName = g->target.is32Bit ? "__pseudo_scatter32_32" :
@@ -2980,7 +2965,7 @@ FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) {
     }
 
     llvm::Instruction *ei = NULL;
-    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(v->getType()))
+    if (llvm::isa<llvm::VectorType>(v->getType()))
         ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), 
                                               name ? name : "extract", bblock);
     else
@@ -3000,7 +2985,7 @@ FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
     }
 
     llvm::Instruction *ii = NULL;
-    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(v->getType()))
+    if (llvm::isa<llvm::VectorType>(v->getType()))
         ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt), 
                                              name ? name : "insert", bblock);
     else
@@ -3012,12 +2997,9 @@ FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
 
 
 llvm::PHINode *
-FunctionEmitContext::PhiNode(LLVM_TYPE_CONST llvm::Type *type, int count, 
+FunctionEmitContext::PhiNode(llvm::Type *type, int count, 
                              const char *name) {
-    llvm::PHINode *pn = llvm::PHINode::Create(type, 
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
-                                              count, 
-#endif // LLVM_3_0
+    llvm::PHINode *pn = llvm::PHINode::Create(type, count,
                                               name ? name : "phi", bblock);
     AddDebugPos(pn);
     return pn;
@@ -3045,18 +3027,18 @@ FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0,
     function has. */
 static unsigned int
 lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) {
-    LLVM_TYPE_CONST llvm::FunctionType *ft = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::FunctionType>(callee->getType());
+    llvm::FunctionType *ft = 
+        llvm::dyn_cast<llvm::FunctionType>(callee->getType());
 
     if (ft == NULL) {
-        LLVM_TYPE_CONST llvm::PointerType *pt =
-            llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(callee->getType());
+        llvm::PointerType *pt =
+            llvm::dyn_cast<llvm::PointerType>(callee->getType());
         if (pt == NULL) {
             // varying--in this case, it must be the version of the
             // function that takes a mask
             return funcType->GetNumParameters() + 1;
         }
-        ft = llvm::dyn_cast<LLVM_TYPE_CONST llvm::FunctionType>(pt->getElementType());
+        ft = llvm::dyn_cast<llvm::FunctionType>(pt->getElementType());
     }
 
     Assert(ft != NULL);
@@ -3083,17 +3065,11 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
     if (argVals.size() + 1 == calleeArgCount)
         argVals.push_back(GetFullMask());
 
-    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(func->getType()) == false) {
+    if (llvm::isa<llvm::VectorType>(func->getType()) == false) {
         // Regular 'uniform' function call--just one function or function
         // pointer, so just emit the IR directly.
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
         llvm::Instruction *ci = 
             llvm::CallInst::Create(func, argVals, name ? name : "", bblock);
-#else
-        llvm::Instruction *ci = 
-            llvm::CallInst::Create(func, argVals.begin(), argVals.end(), 
-                                   name ? name : "", bblock);
-#endif
         AddDebugPos(ci);
         return ci;
     }
@@ -3117,7 +3093,7 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
         // First allocate memory to accumulate the various program
         // instances' return values...
         const Type *returnType = funcType->GetReturnType();
-        LLVM_TYPE_CONST llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
+        llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
         llvm::Value *resultPtr = NULL;
         if (llvmReturnType->isVoidTy() == false)
             resultPtr = AllocaInst(llvmReturnType);
@@ -3184,9 +3160,9 @@ FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
 
             // bitcast the i32/64 function pointer to the actual function
             // pointer type (the variant that includes a mask).
-            LLVM_TYPE_CONST llvm::Type *llvmFuncType =
+            llvm::Type *llvmFuncType =
                 funcType->LLVMFunctionType(g->ctx, true);
-            LLVM_TYPE_CONST llvm::Type *llvmFPtrType = 
+            llvm::Type *llvmFPtrType = 
                 llvm::PointerType::get(llvmFuncType, 0);
             llvm::Value *fptrCast = IntToPtrInst(fptr, llvmFPtrType);
 
@@ -3283,14 +3259,14 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
     launchedTasks = true;
 
     Assert(llvm::isa<llvm::Function>(callee));
-    LLVM_TYPE_CONST llvm::Type *argType = 
+    llvm::Type *argType = 
         (llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
     Assert(llvm::PointerType::classof(argType));
-    LLVM_TYPE_CONST llvm::PointerType *pt = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(argType);
+    llvm::PointerType *pt = 
+        llvm::dyn_cast<llvm::PointerType>(argType);
     Assert(llvm::StructType::classof(pt->getElementType()));
-    LLVM_TYPE_CONST llvm::StructType *argStructType = 
-        static_cast<LLVM_TYPE_CONST llvm::StructType *>(pt->getElementType());
+    llvm::StructType *argStructType = 
+        static_cast<llvm::StructType *>(pt->getElementType());
     Assert(argStructType->getNumElements() == argVals.size() + 1);
 
     llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
@@ -3388,7 +3364,7 @@ FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr,
         return ptr;
     
     // Find the size of a uniform element of the varying type
-    LLVM_TYPE_CONST llvm::Type *llvmBaseUniformType = 
+    llvm::Type *llvmBaseUniformType = 
         baseType->GetAsUniformType()->LLVMType(g->ctx);
     llvm::Value *unifSize = g->target.SizeOf(llvmBaseUniformType, bblock);
     unifSize = SmearUniform(unifSize);
diff --git a/ctx.h b/ctx.h
index 0b1ccffa..6c3f2887 100644
--- a/ctx.h
+++ b/ctx.h
@@ -380,23 +380,23 @@ public:
         array, for pointer types). */
     llvm::Value *SmearUniform(llvm::Value *value, const char *name = NULL);
 
-    llvm::Value *BitCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
+    llvm::Value *BitCastInst(llvm::Value *value, llvm::Type *type,
                              const char *name = NULL);
     llvm::Value *PtrToIntInst(llvm::Value *value, const char *name = NULL);
-    llvm::Value *PtrToIntInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
+    llvm::Value *PtrToIntInst(llvm::Value *value, llvm::Type *type,
                               const char *name = NULL);
-    llvm::Value *IntToPtrInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
+    llvm::Value *IntToPtrInst(llvm::Value *value, llvm::Type *type,
                               const char *name = NULL);
 
-    llvm::Instruction *TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
+    llvm::Instruction *TruncInst(llvm::Value *value, llvm::Type *type,
                                  const char *name = NULL);
     llvm::Instruction *CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
-                                LLVM_TYPE_CONST llvm::Type *type, const char *name = NULL);
-    llvm::Instruction *FPCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, 
+                                llvm::Type *type, const char *name = NULL);
+    llvm::Instruction *FPCastInst(llvm::Value *value, llvm::Type *type, 
                                   const char *name = NULL);
-    llvm::Instruction *SExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, 
+    llvm::Instruction *SExtInst(llvm::Value *value, llvm::Type *type, 
                                 const char *name = NULL);
-    llvm::Instruction *ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type, 
+    llvm::Instruction *ZExtInst(llvm::Value *value, llvm::Type *type, 
                                 const char *name = NULL);
 
     /** Given two integer-typed values (but possibly one vector and the
@@ -448,7 +448,7 @@ public:
         instruction is added at the start of the function in the entry
         basic block; if it should be added to the current basic block, then
         the atEntryBlock parameter should be false. */ 
-    llvm::Value *AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType, 
+    llvm::Value *AllocaInst(llvm::Type *llvmType, 
                             const char *name = NULL, int align = 0, 
                             bool atEntryBlock = true);
 
@@ -485,7 +485,7 @@ public:
     llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, 
                             const char *name = NULL);
 
-    llvm::PHINode *PhiNode(LLVM_TYPE_CONST llvm::Type *type, int count, 
+    llvm::PHINode *PhiNode(llvm::Type *type, int count, 
                            const char *name = NULL);
     llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0,
                                   llvm::Value *val1, const char *name = NULL);
@@ -632,7 +632,7 @@ private:
     std::vector<CFInfo *> controlFlowInfo;
 
     /** DIFile object corresponding to the source file where the current
-        function was defined (used for debugging info0. */
+        function was defined (used for debugging info). */
     llvm::DIFile diFile;
 
     /** DISubprogram corresponding to this function (used for debugging
diff --git a/decl.cpp b/decl.cpp
index f3eb701e..f451131d 100644
--- a/decl.cpp
+++ b/decl.cpp
@@ -33,7 +33,7 @@
 
 /** @file decl.cpp
     @brief Implementations of classes related to turning declarations into 
-           symbols and types.
+           symbol names and types.
 */
 
 #include "decl.h"
@@ -44,6 +44,7 @@
 #include "stmt.h"
 #include "expr.h"
 #include <stdio.h>
+#include <string.h>
 #include <set>
 
 static void
@@ -55,6 +56,7 @@ lPrintTypeQualifiers(int typeQualifiers) {
     if (typeQualifiers & TYPEQUAL_TASK)      printf("task ");
     if (typeQualifiers & TYPEQUAL_SIGNED)    printf("signed ");
     if (typeQualifiers & TYPEQUAL_UNSIGNED)  printf("unsigned ");
+    if (typeQualifiers & TYPEQUAL_EXPORT)    printf("export ");
 }
 
 
@@ -188,7 +190,6 @@ lGetStorageClassName(StorageClass storageClass) {
     case SC_NONE:     return "";
     case SC_EXTERN:   return "extern";
     case SC_EXTERN_C: return "extern \"C\"";
-    case SC_EXPORT:   return "export";
     case SC_STATIC:   return "static";
     case SC_TYPEDEF:  return "typedef";
     default:          FATAL("Unhandled storage class in lGetStorageClassName");
@@ -217,50 +218,44 @@ Declarator::Declarator(DeclaratorKind dk, SourcePos p)
     : pos(p), kind(dk) { 
     child = NULL;
     typeQualifiers = 0;
+    storageClass = SC_NONE;
     arraySize = -1;
-    sym = NULL;
+    type = NULL;
     initExpr = NULL;
 }
 
 
 void
 Declarator::InitFromDeclSpecs(DeclSpecs *ds) {
-    const Type *t = GetType(ds);
-    if (t == NULL) {
+    const Type *baseType = ds->GetBaseType(pos);
+    InitFromType(baseType, ds);
+
+    if (type == NULL) {
         Assert(m->errorCount > 0);
         return;
     }
 
-    Symbol *sym = GetSymbol();
-    if (sym != NULL) {
-        sym->type = t;
-        sym->storageClass = ds->storageClass;
+    storageClass = ds->storageClass;
+
+    if (ds->declSpecList.size() > 0 && 
+        dynamic_cast<const FunctionType *>(type) == NULL) {
+        Error(pos, "__declspec specifiers for non-function type \"%s\" are "
+              "not used.", type->GetString().c_str());
     }
 }
 
 
-Symbol *
-Declarator::GetSymbol() const {
-    // The symbol lives at the last child in the chain, so walk down there
-    // and return the one there.
-    const Declarator *d = this;
-    while (d->child != NULL)
-        d = d->child;
-    return d->sym;
-}
-
-
 void
 Declarator::Print(int indent) const {
     printf("%*cdeclarator: [", indent, ' ');
     pos.Print();
 
     lPrintTypeQualifiers(typeQualifiers);
-    Symbol *sym = GetSymbol();
-    if (sym != NULL)
-        printf("%s", sym->name.c_str());
+    printf("%s ", lGetStorageClassName(storageClass));
+    if (name.size() > 0)
+        printf("%s", name.c_str());
     else
-        printf("(null symbol)");
+        printf("(unnamed)");
 
     printf(", array size = %d", arraySize);
 
@@ -294,66 +289,26 @@ Declarator::Print(int indent) const {
 }
 
 
-Symbol *
-Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
-    const FunctionType *type = 
-        dynamic_cast<const FunctionType *>(GetType(ds));
-    if (type == NULL)
-        return NULL;
-
-    Symbol *declSym = GetSymbol();
-    Assert(declSym != NULL);
-
-    // Get the symbol for the function from the symbol table.  (It should
-    // already have been added to the symbol table by AddGlobal() by the
-    // time we get here.)
-    Symbol *funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type);
-    if (funSym == NULL)
-        // May be NULL due to error earlier in compilation
-        Assert(m->errorCount > 0);
-    else
-        funSym->pos = pos;
-
-    // Walk down to the declarator for the function.  (We have to get past
-    // the stuff that specifies the function's return type before we get to
-    // the function's declarator.)
-    Declarator *d = this;
-    while (d != NULL && d->kind != DK_FUNCTION)
-        d = d->child;
-    Assert(d != NULL);
-
-    for (unsigned int i = 0; i < d->functionParams.size(); ++i) {
-        Symbol *sym = d->GetSymbolForFunctionParameter(i);
-        if (sym->type == NULL) {
-            Assert(m->errorCount > 0);
-            continue;
-        }
-        else
-            sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
-
-        funArgs->push_back(sym);
-    }
-
-    if (funSym != NULL)
-        funSym->type = funSym->type->ResolveUnboundVariability(Variability::Varying);
-
-    return funSym;
-}
-
-
-const Type *
-Declarator::GetType(const Type *base, DeclSpecs *ds) const {
+void
+Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
     bool hasUniformQual = ((typeQualifiers & TYPEQUAL_UNIFORM) != 0);
     bool hasVaryingQual = ((typeQualifiers & TYPEQUAL_VARYING) != 0);
     bool isTask =         ((typeQualifiers & TYPEQUAL_TASK) != 0);
+    bool isExported =     ((typeQualifiers & TYPEQUAL_EXPORT) != 0);
     bool isConst =        ((typeQualifiers & TYPEQUAL_CONST) != 0);
 
     if (hasUniformQual && hasVaryingQual) {
         Error(pos, "Can't provide both \"uniform\" and \"varying\" qualifiers.");
-        return NULL;
+        return;
     }
-    if (kind != DK_FUNCTION && isTask)
+    if (kind != DK_FUNCTION && isTask) {
         Error(pos, "\"task\" qualifier illegal in variable declaration.");
+        return;
+    }
+    if (kind != DK_FUNCTION && isExported) {
+        Error(pos, "\"export\" qualifier illegal in variable declaration.");
+        return;
+    }
 
     Variability variability(Variability::Unbound);
     if (hasUniformQual)
@@ -361,69 +316,79 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
     else if (hasVaryingQual)
         variability = Variability::Varying;
 
-    const Type *type = base;
-    switch (kind) {
-    case DK_BASE:
+    if (kind == DK_BASE) {
         // All of the type qualifiers should be in the DeclSpecs for the
         // base declarator
         Assert(typeQualifiers == 0);
         Assert(child == NULL);
-        return type;
-
-    case DK_POINTER:
+        type = baseType;
+    }
+    else if (kind == DK_POINTER) {
         /* For now, any pointer to an SOA type gets the slice property; if
            we add the capability to declare pointers as slices or not,
            we'll want to set this based on a type qualifier here. */
-        type = new PointerType(type, variability, isConst, type->IsSOAType());
-        if (child != NULL)
-            return child->GetType(type, ds);
+        const Type *ptrType = new PointerType(baseType, variability, isConst,
+                                              baseType->IsSOAType());
+        if (child != NULL) {
+            child->InitFromType(ptrType, ds);
+            type = child->type;
+            name = child->name;
+        }
         else
-            return type;
-        break;
-
-    case DK_REFERENCE:
-        if (hasUniformQual)
+            type = ptrType;
+    }
+    else if (kind == DK_REFERENCE) {
+        if (hasUniformQual) {
             Error(pos, "\"uniform\" qualifier is illegal to apply to references.");
-        if (hasVaryingQual)
+            return;
+        }
+        if (hasVaryingQual) {
             Error(pos, "\"varying\" qualifier is illegal to apply to references.");
-        if (isConst)
+            return;
+        }
+        if (isConst) {
             Error(pos, "\"const\" qualifier is to illegal apply to references.");
-
+            return;
+        }
         // The parser should disallow this already, but double check.
-        if (dynamic_cast<const ReferenceType *>(type) != NULL) {
+        if (dynamic_cast<const ReferenceType *>(baseType) != NULL) {
             Error(pos, "References to references are illegal.");
-            return NULL;
+            return;
         }
 
-        type = new ReferenceType(type);
-        if (child != NULL)
-            return child->GetType(type, ds);
+        const Type *refType = new ReferenceType(baseType);
+        if (child != NULL) {
+            child->InitFromType(refType, ds);
+            type = child->type;
+            name = child->name;
+        }
         else
-            return type;
-        break;
-
-    case DK_ARRAY:
-        if (Type::Equal(type, AtomicType::Void)) {
+            type = refType;
+    }
+    else if (kind == DK_ARRAY) {
+        if (Type::Equal(baseType, AtomicType::Void)) {
             Error(pos, "Arrays of \"void\" type are illegal.");
-            return NULL;
+            return;
         }
-        if (dynamic_cast<const ReferenceType *>(type)) {
+        if (dynamic_cast<const ReferenceType *>(baseType)) {
             Error(pos, "Arrays of references (type \"%s\") are illegal.",
-                  type->GetString().c_str());
-            return NULL;
+                  baseType->GetString().c_str());
+            return;
         }
 
-        type = new ArrayType(type, arraySize);
-        if (child)
-            return child->GetType(type, ds);
+        const Type *arrayType = new ArrayType(baseType, arraySize);
+        if (child != NULL) {
+            child->InitFromType(arrayType, ds);
+            type = child->type;
+            name = child->name;
+        }
         else
-            return type;
-        break;
-
-    case DK_FUNCTION: {
+            type = arrayType;
+    }
+    else if (kind == DK_FUNCTION) {
         std::vector<const Type *> args;
         std::vector<std::string> argNames;
-        std::vector<ConstExpr *> argDefaults;
+        std::vector<Expr *> argDefaults;
         std::vector<SourcePos> argPos;
 
         // Loop over the function arguments and store the names, types,
@@ -432,20 +397,44 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
         for (unsigned int i = 0; i < functionParams.size(); ++i) {
             Declaration *d = functionParams[i];
 
-            Symbol *sym = GetSymbolForFunctionParameter(i);
-
-            if (d->declSpecs->storageClass != SC_NONE)
-                Error(sym->pos, "Storage class \"%s\" is illegal in "
-                      "function parameter declaration for parameter \"%s\".", 
-                      lGetStorageClassName(d->declSpecs->storageClass),
-                      sym->name.c_str());
-            if (Type::Equal(sym->type, AtomicType::Void)) {
-                Error(sym->pos, "Parameter with type \"void\" illegal in function "
-                      "parameter list.");
-                sym->type = NULL;
+            if (d == NULL) {
+                Assert(m->errorCount > 0);
+                continue;
+            }
+            if (d->declarators.size() == 0) {
+                // function declaration like foo(float), w/o a name for the
+                // parameter; wire up a placeholder Declarator for it
+                d->declarators.push_back(new Declarator(DK_BASE, pos));
+                d->declarators[0]->InitFromDeclSpecs(d->declSpecs);
             }
 
-            const ArrayType *at = dynamic_cast<const ArrayType *>(sym->type);
+            Assert(d->declarators.size() == 1);
+            Declarator *decl = d->declarators[0];
+            if (decl == NULL || decl->type == NULL) {
+                Assert(m->errorCount > 0);
+                continue;
+            }
+
+            if (decl->name == "") {
+                // Give a name to any anonymous parameter declarations
+                char buf[32];
+                sprintf(buf, "__anon_parameter_%d", i);
+                decl->name = buf;
+            }
+            decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
+
+            if (d->declSpecs->storageClass != SC_NONE)
+                Error(decl->pos, "Storage class \"%s\" is illegal in "
+                      "function parameter declaration for parameter \"%s\".", 
+                      lGetStorageClassName(d->declSpecs->storageClass),
+                      decl->name.c_str());
+            if (Type::Equal(decl->type, AtomicType::Void)) {
+                Error(decl->pos, "Parameter with type \"void\" illegal in function "
+                      "parameter list.");
+                decl->type = NULL;
+            }
+
+            const ArrayType *at = dynamic_cast<const ArrayType *>(decl->type);
             if (at != NULL) {
                 // As in C, arrays are passed to functions as pointers to
                 // their element type.  We'll just immediately make this
@@ -455,93 +444,94 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
                 // report this differently than it was originally declared
                 // in the function, but it's not clear that this is a
                 // significant problem.)
-                if (at->GetElementType() == NULL) {
+                const Type *targetType = at->GetElementType();
+                if (targetType == NULL) {
                     Assert(m->errorCount > 0);
-                    return NULL;
+                    return;
                 }
 
-                const Type *targetType = at->GetElementType();
-                targetType = 
-                    targetType->ResolveUnboundVariability(Variability::Varying);
-                sym->type = PointerType::GetUniform(targetType);
+                decl->type = PointerType::GetUniform(targetType);
 
                 // Make sure there are no unsized arrays (other than the
                 // first dimension) in function parameter lists.
-                at = dynamic_cast<const ArrayType *>(at->GetElementType());
+                at = dynamic_cast<const ArrayType *>(targetType);
                 while (at != NULL) {
                     if (at->GetElementCount() == 0)
-                        Error(sym->pos, "Arrays with unsized dimensions in "
+                        Error(decl->pos, "Arrays with unsized dimensions in "
                               "dimensions after the first one are illegal in "
                               "function parameter lists.");
                     at = dynamic_cast<const ArrayType *>(at->GetElementType());
                 }
             }
 
-            args.push_back(sym->type);
-            argNames.push_back(sym->name);
-            argPos.push_back(sym->pos);
+            args.push_back(decl->type);
+            argNames.push_back(decl->name);
+            argPos.push_back(decl->pos);
 
-            ConstExpr *init = NULL;
-            if (d->declarators.size()) {
-                // Try to find an initializer expression; if there is one,
-                // it lives down to the base declarator.
-                Declarator *decl = d->declarators[0];
-                while (decl->child != NULL) {
-                    Assert(decl->initExpr == NULL);
+            Expr *init = NULL;
+            // Try to find an initializer expression.
+            while (decl != NULL) {
+                if (decl->initExpr != NULL) {
+                    decl->initExpr = TypeCheck(decl->initExpr);
+                    decl->initExpr = Optimize(decl->initExpr);
+                    if (decl->initExpr != NULL) {
+                        init = dynamic_cast<ConstExpr *>(decl->initExpr);
+                        if (init == NULL)
+                            init = dynamic_cast<NullPointerExpr *>(decl->initExpr);
+                        if (init == NULL)
+                            Error(decl->initExpr->pos, "Default value for parameter "
+                                  "\"%s\" must be a compile-time constant.", 
+                                  decl->name.c_str());
+                    }
+                    break;
+                }
+                else
                     decl = decl->child;
-                }
-
-                if (decl->initExpr != NULL &&
-                    (decl->initExpr = TypeCheck(decl->initExpr)) != NULL &&
-                    (decl->initExpr = Optimize(decl->initExpr)) != NULL &&
-                    (init = dynamic_cast<ConstExpr *>(decl->initExpr)) == NULL) {
-                    Error(decl->initExpr->pos, "Default value for parameter "
-                          "\"%s\" must be a compile-time constant.", 
-                          sym->name.c_str());
-                }
             }
             argDefaults.push_back(init);
         }
 
-        const Type *returnType = type;
+        const Type *returnType = baseType;
         if (returnType == NULL) {
             Error(pos, "No return type provided in function declaration.");
-            return NULL;
+            return;
         }
+
         if (dynamic_cast<const FunctionType *>(returnType) != NULL) {
             Error(pos, "Illegal to return function type from function.");
-            return NULL;
+            return;
         }
         
-        bool isExported = ds && (ds->storageClass == SC_EXPORT);
+        returnType = returnType->ResolveUnboundVariability(Variability::Varying);
+
         bool isExternC =  ds && (ds->storageClass == SC_EXTERN_C);
+        bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0);
         bool isTask =     ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
 
         if (isExported && isTask) {
             Error(pos, "Function can't have both \"task\" and \"export\" "
                   "qualifiers");
-            return NULL;
+            return;
         }
         if (isExternC && isTask) {
             Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
                   "qualifiers");
-            return NULL;
+            return;
         }
         if (isExternC && isExported) {
             Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "
                   "qualifiers");
-            return NULL;
+            return;
         }
 
         if (child == NULL) {
             Assert(m->errorCount > 0);
-            return NULL;
+            return;
         }
 
         const FunctionType *functionType = 
             new FunctionType(returnType, args, argNames, argDefaults,
                              argPos, isTask, isExported, isExternC);
-        functionType = functionType->ResolveUnboundVariability(Variability::Varying);
 
         // handle any explicit __declspecs on the function
         if (ds != NULL) {
@@ -563,11 +553,9 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
             }
         }
 
-        return child->GetType(functionType, ds);
-    }
-    default:
-        FATAL("Unexpected decl kind");
-        return NULL;
+        child->InitFromType(functionType, ds);
+        type = child->type;
+        name = child->name;
     }
 }
 
@@ -646,27 +634,23 @@ Declaration::GetVariableDeclarations() const {
 
     for (unsigned int i = 0; i < declarators.size(); ++i) {
         Declarator *decl = declarators[i];
-        if (decl == NULL) {
+        if (decl == NULL || decl->type == NULL) {
             // Ignore earlier errors
             Assert(m->errorCount > 0);
             continue;
         }
 
-        Symbol *sym = decl->GetSymbol();
-        if (sym == NULL || sym->type == NULL) {
-            // Ignore errors
-            Assert(m->errorCount > 0);
-            continue;
-        }
-        sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
-
-        if (Type::Equal(sym->type, AtomicType::Void))
-            Error(sym->pos, "\"void\" type variable illegal in declaration.");
-        else if (dynamic_cast<const FunctionType *>(sym->type) == NULL) {
+        if (Type::Equal(decl->type, AtomicType::Void))
+            Error(decl->pos, "\"void\" type variable illegal in declaration.");
+        else if (dynamic_cast<const FunctionType *>(decl->type) == NULL) {
+            decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
+            Symbol *sym = new Symbol(decl->name, decl->pos, decl->type,
+                                     decl->storageClass);
             m->symbolTable->AddVariable(sym);
             vars.push_back(VariableDeclaration(sym, decl->initExpr));
         }
     }
+
     return vars;
 }
 
@@ -677,25 +661,20 @@ Declaration::DeclareFunctions() {
 
     for (unsigned int i = 0; i < declarators.size(); ++i) {
         Declarator *decl = declarators[i];
-        if (decl == NULL) {
+        if (decl == NULL || decl->type == NULL) {
             // Ignore earlier errors
             Assert(m->errorCount > 0);
             continue;
         }
 
-        Symbol *sym = decl->GetSymbol();
-        if (sym == NULL || sym->type == NULL) {
-            // Ignore errors
-            Assert(m->errorCount > 0);
-            continue;
-        }
-        sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
-
-        if (dynamic_cast<const FunctionType *>(sym->type) == NULL)
+        const FunctionType *ftype = 
+            dynamic_cast<const FunctionType *>(decl->type);
+        if (ftype == NULL)
             continue;
 
         bool isInline = (declSpecs->typeQualifiers & TYPEQUAL_INLINE);
-        m->AddFunctionDeclaration(sym, isInline);
+        m->AddFunctionDeclaration(decl->name, ftype, decl->storageClass,
+                                  isInline, decl->pos);
     }
 }
 
@@ -709,6 +688,7 @@ Declaration::Print(int indent) const {
         declarators[i]->Print(indent+4);
 }
 
+
 ///////////////////////////////////////////////////////////////////////////
 
 void
@@ -725,38 +705,42 @@ GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
         // FIXME: making this fake little DeclSpecs here is really
         // disgusting
         DeclSpecs ds(type);
-        if (type->IsUniformType()) 
-            ds.typeQualifiers |= TYPEQUAL_UNIFORM;
-        else if (type->IsVaryingType())
-            ds.typeQualifiers |= TYPEQUAL_VARYING;
+        if (Type::Equal(type, AtomicType::Void) == false) {
+            if (type->IsUniformType()) 
+                ds.typeQualifiers |= TYPEQUAL_UNIFORM;
+            else if (type->IsVaryingType())
+                ds.typeQualifiers |= TYPEQUAL_VARYING;
+            else if (type->GetSOAWidth() != 0)
+                ds.soaWidth = type->GetSOAWidth();
+            // FIXME: ds.vectorSize?
+        }
 
         for (unsigned int j = 0; j < sd[i]->declarators->size(); ++j) {
             Declarator *d = (*sd[i]->declarators)[j];
             d->InitFromDeclSpecs(&ds);
 
-            Symbol *sym = d->GetSymbol();
-
-            if (Type::Equal(sym->type, AtomicType::Void))
+            if (Type::Equal(d->type, AtomicType::Void))
                 Error(d->pos, "\"void\" type illegal for struct member.");
 
-            const ArrayType *arrayType = 
-                dynamic_cast<const ArrayType *>(sym->type);
-            if (arrayType != NULL && arrayType->GetElementCount() == 0) {
-                Error(d->pos, "Unsized arrays aren't allowed in struct "
-                      "definitions.");
-                elementTypes->push_back(NULL);
-            }
-            else
-                elementTypes->push_back(sym->type);
+            elementTypes->push_back(d->type);
 
-            if (seenNames.find(sym->name) != seenNames.end())
+            if (seenNames.find(d->name) != seenNames.end())
                 Error(d->pos, "Struct member \"%s\" has same name as a "
-                      "previously-declared member.", sym->name.c_str());
+                      "previously-declared member.", d->name.c_str());
             else
-                seenNames.insert(sym->name);
+                seenNames.insert(d->name);
 
-            elementNames->push_back(sym->name);
-            elementPositions->push_back(sym->pos);
+            elementNames->push_back(d->name);
+            elementPositions->push_back(d->pos);
         }
     }
+
+    for (int i = 0; i < (int)elementTypes->size() - 1; ++i) {
+        const ArrayType *arrayType = 
+            dynamic_cast<const ArrayType *>((*elementTypes)[i]);
+
+        if (arrayType != NULL && arrayType->GetElementCount() == 0)
+            Error((*elementPositions)[i], "Unsized arrays aren't allowed except "
+                  "for the last member in a struct definition.");
+    }
 }
diff --git a/decl.h b/decl.h
index 0bae20b8..ea2cb0fd 100644
--- a/decl.h
+++ b/decl.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -47,8 +47,8 @@
     variables--here, that the declaration has the 'static' and 'uniform'
     qualifiers, and that it's basic type is 'int'.  Then for each variable
     declaration, the Declaraiton class holds an instance of a Declarator,
-    which in turn records the per-variable information like the symbol
-    name, array size (if any), initializer expression, etc.
+    which in turn records the per-variable information like the name, array
+    size (if any), initializer expression, etc.  
 */
 
 #ifndef ISPC_DECL_H
@@ -61,16 +61,6 @@ struct VariableDeclaration;
 class Declaration;
 class Declarator;
 
-enum StorageClass {
-    SC_NONE,
-    SC_EXTERN,
-    SC_EXPORT,
-    SC_STATIC,
-    SC_TYPEDEF,
-    SC_EXTERN_C
-};
-
-
 /* Multiple qualifiers can be provided with types in declarations;
    therefore, they are set up so that they can be ANDed together into an
    int. */
@@ -82,6 +72,7 @@ enum StorageClass {
 #define TYPEQUAL_SIGNED     (1<<4)
 #define TYPEQUAL_UNSIGNED   (1<<5)
 #define TYPEQUAL_INLINE     (1<<6)
+#define TYPEQUAL_EXPORT     (1<<7)
 
 /** @brief Representation of the declaration specifiers in a declaration.
 
@@ -141,25 +132,11 @@ public:
     Declarator(DeclaratorKind dk, SourcePos p);
 
     /** Once a DeclSpecs instance is available, this method completes the
-        initialization of the Symbol, setting its Type accordingly.
+        initialization of the type member.
      */
     void InitFromDeclSpecs(DeclSpecs *ds);
 
-    /** Get the actual type of the combination of Declarator and the given
-        DeclSpecs.  If an explicit base type is provided, the declarator is
-        applied to that type; otherwise the base type from the DeclSpecs is
-        used. */
-    const Type *GetType(DeclSpecs *ds) const;
-    const Type *GetType(const Type *base, DeclSpecs *ds) const;
-
-    /** Returns the symbol corresponding to the function declared by this
-        declarator and symbols for its arguments in *args. */
-    Symbol *GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *args);
-
-    Symbol *GetSymbolForFunctionParameter(int paramNum) const;
-
-    /** Returns the symbol associated with the declarator. */
-    Symbol *GetSymbol() const;
+    void InitFromType(const Type *base, DeclSpecs *ds);
 
     void Print(int indent) const;
 
@@ -180,18 +157,24 @@ public:
     /** Type qualifiers provided with the declarator. */
     int typeQualifiers;
 
+    StorageClass storageClass;
+
     /** For array declarators, this gives the declared size of the array.
         Unsized arrays have arraySize == 0. */ 
     int arraySize;
 
-    /** Symbol associated with the declarator. */
-    Symbol *sym;
+    /** Name associated with the declarator. */
+    std::string name;
 
     /** Initialization expression for the variable.  May be NULL. */
     Expr *initExpr;
 
+    /** Type of the declarator.  This is NULL until InitFromDeclSpecs() or
+        InitFromType() is called. */
+    const Type *type;
+
     /** For function declarations, this holds the Declaration *s for the
-        funciton's parameters. */
+        function's parameters. */
     std::vector<Declaration *> functionParams;
 };
 
diff --git a/docs/ReleaseNotes.txt b/docs/ReleaseNotes.txt
index 62f46289..f99066ac 100644
--- a/docs/ReleaseNotes.txt
+++ b/docs/ReleaseNotes.txt
@@ -1,3 +1,47 @@
+=== v1.2.1 === (6 April 2012)
+
+This release contains only minor new functionality and is mostly for many
+small bugfixes and improvements to error handling and error reporting.
+The new functionality that is present is:
+
+* Significantly more efficient versions of the float / half conversion
+  routines are now available in the standard library, thanks to Fabian
+  Giesen.
+
+* The last member of a struct can now be a zero-length array; this allows
+  the trick of dynamically allocating enough storage for the struct and
+  some number of array elements at the end of it.
+
+Significant bugs fixed include:
+
+* Issue #205: When a target ISA isn't specified, use the host system's
+  capabilities to choose a target for which it will be able to run the
+  generated code.
+
+* Issues #215 and #217: Don't allocate storage for global variables that
+  are declared "extern".
+
+* Issue #197: Allow NULL as a default argument value in a function
+  declaration.
+
+* Issue #223: Fix bugs where taking the address of a function wouldn't work
+  as expected.
+
+* Issue #224: When there are overloaded variants of a function that take
+  both reference and const reference parameters, give the non-const
+  reference preference when matching values of that underlying type.
+
+* Issue #225: An error is issed when a varying lvalue is assigned to a
+  reference type (rather than crashing).
+
+* Issue #193: Permit conversions from array types to void *, not just the
+  pointer type of the underlying array element.
+
+* Issue #199: Still evaluate expressions that are cast to (void).
+
+The documentation has also been improved, with FAQs added to clarify some
+aspects of the ispc pointer model.
+
 === v1.2.0 === (20 March 2012)
 
 This is a major new release of ispc, with a number of significant
diff --git a/docs/faq.rst b/docs/faq.rst
index 2cdca136..a3517bea 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -14,12 +14,19 @@ distribution.
   + `Why are there multiple versions of exported ispc functions in the assembly output?`_
   + `How can I more easily see gathers and scatters in generated assembly?`_
 
+* Language Details
+
+  + `What is the difference between "int *foo" and "int foo[]"?`_
+  + `Why are pointed-to types "uniform" by default?`_
+  + `What am I getting an error about assigning a varying lvalue to a reference type?`_ 
+  
 * Interoperability
 
   + `How can I supply an initial execution mask in the call from the application?`_
   + `How can I generate a single binary executable with support for multiple instruction sets?`_
   + `How can I determine at run-time which vector instruction set's instructions were selected to execute?`_
   + `Is it possible to inline ispc functions in C/C++ code?`_
+  + `Why is it illegal to pass "varying" values from C/C++ to ispc functions?`_ 
 
 * Programming Techniques
 
@@ -213,6 +220,125 @@ easier to understand:
             jmp        ___pseudo_scatter_base_offsets32_32 ## TAILCALL
 
 
+Language Details
+================
+
+What is the difference between "int \*foo" and "int foo[]"?
+-----------------------------------------------------------
+
+In C and C++, declaring a function to take a parameter ``int *foo`` and
+``int foo[]`` results in the same type for the parameter.  Both are
+pointers to integers.  In ``ispc``, these are different types.  The first
+one is a varying pointer to a uniform integer value in memory, while the
+second results in a uniform pointer to the start of an array of varying
+integer values in memory.
+
+To understand why the first is a varying pointer to a uniform integer,
+first recall that types without explicit rate qualifiers (``uniform``,
+``varying``, or ``soa<>``) are ``varying`` by default.  Second, recall from
+the `discussion of pointer types in the ispc User's Guide`_ that pointed-to
+types without rate qualifiers are ``uniform`` by default.  (This second
+rule is discussed further below, in `Why are pointed-to types "uniform" by
+default?`_.)  The type of ``int *foo`` follows from these.
+
+.. _discussion of pointer types in the ispc User's Guide: ispc.html#pointer-types 
+
+Conversely, in a function body, ``int foo[10]`` represents a declaration of
+a 10-element array of varying ``int`` values.  In that we'd certainly like
+to be able to pass such an array to a function that takes a ``int []``
+parameter, the natural type for an ``int []`` parameter is a uniform
+pointer to varying integer values.
+
+In terms of compatibility with C/C++, it's unfortunate that this
+distinction exists, though any other set of rules seems to introduce more
+awkwardness than this one.  (Though we're interested to hear ideas to
+improve these rules!).
+
+Why are pointed-to types "uniform" by default?
+----------------------------------------------
+
+In ``ispc``, types without rate qualifiers are "varying" by default, but
+types pointed to by pointers without rate qualifiers are "uniform" by
+default.  Why this difference?
+
+::
+
+    int foo;  // no rate qualifier, "varying int".
+    uniform int *foo;  // pointer type has no rate qualifier, pointed-to does.
+                       // "varying pointer to uniform int".
+    int *foo;  // neither pointer type nor pointed-to type ("int") have
+               // rate qualifiers. Pointer type is varying by default,
+               // pointed-to is uniform. "varying pointer to uniform int".
+    varying int *foo;   // varying pointer to varying int
+
+The first rule, having types without rate qualifiers be varying by default,
+is a default that keeps the number of "uniform" or "varying" qualifiers in
+``ispc`` programs low.  Most ``ispc`` programs use mostly "varying"
+variables, so this rule allows most variables to be declared without also
+requiring rate qualifiers.
+
+On a related note, this rule allows many C/C++ functions to be used to
+define equivalent functions in the SPMD execution model that ``ispc``
+provides with little or no modification:
+
+::
+
+    // scalar add in C/C++, SPMD/vector add in ispc
+    int add(int a, int b) { return a + b; }
+
+This motivation also explains why ``uniform int *foo`` represents a varying
+pointer; having pointers be varying by default if they don't have rate
+qualifiers similarly helps with porting code from C/C++ to ``ispc``.
+
+The tricker issue is why pointed-to types are "uniform" by default.  In our
+experience, data in memory that is accessed via pointers is most often
+uniform; this generally includes all data that has been allocated and
+initialized by the C/C++ application code. In practice, "varying" types are
+more generally (but not exclusively) used for local data in ``ispc``
+functions.  Thus, making the pointed-to type uniform by default leads to
+more concise code for the most common cases.
+
+
+What am I getting an error about assigning a varying lvalue to a reference type?
+--------------------------------------------------------------------------------
+
+Given code like the following:
+
+::
+
+    uniform float a[...];
+    int index = ...;
+    float &r = a[index];
+
+``ispc`` issues the error "Initializer for reference-type variable "r" must
+have a uniform lvalue type.".  The underlying issue stems from how
+references are represented in the code generated by ``ispc``.  Recall that
+``ispc`` supports both uniform and varying pointer types--a uniform pointer
+points to the same location in memory for all program instances in the
+gang, while a varying pointer allows each program instance to have its own
+pointer value.
+
+References are represented a pointer in the code generated by ``ispc``,
+though this is generally opaque to the user; in ``ispc``, they are
+specifically uniform pointers.  This design decision was made so that given
+code like this:
+
+::
+
+    extern void func(float &val);
+    float foo = ...;
+    func(foo);
+
+Then the reference would be handled efficiently as a single pointer, rather
+than unnecessarily being turned into a gang-size of pointers.
+
+However, an implication of this decision is that it's not possible for
+references to refer to completely different things for each of the program
+instances.  (And hence the error that is issued).  In cases where a unique
+per-program-instance pointer is needed, a varying pointer should be used
+instead of a reference.
+
+
 Interoperability
 ================
 
@@ -391,6 +517,48 @@ linking your applicaiton.
 ``-mattr=+avx`` flag to ``llc``.)
     
 
+Why is it illegal to pass "varying" values from C/C++ to ispc functions?
+------------------------------------------------------------------------
+
+If any of the types in the parameter list to an exported function is
+"varying" (including recursively, and members of structure types, etc.),
+then ``ispc`` will issue an error and refuse to compile the function:
+
+::
+
+    % echo "export int add(int x) { return ++x; }" | ispc
+    <stdin>:1:12: Error: Illegal to return a "varying" type from exported function "foo" 
+    <stdin>:1:20: Error: Varying parameter "x" is illegal in an exported function. 
+
+While there's no fundamental reason why this isn't possible, recall the
+definition of "varying" variables: they have one value for each program
+instance in the gang.  As such, the number of values and amount of storage
+required to represent a varying variable depends on the gang size
+(i.e. ``programCount``), which can have different values depending on the
+compilation target.
+
+``ispc`` therefore prohibits passing "varying" values between the
+application and the ``ispc`` program in order to prevent the
+application-side code from depending on a particular gang size, in order to
+encourage portability to different gang sizes.  (A generally desirable
+programming practice.)
+
+For cases where the size of data is actually fixed from the application
+side, the value can be passed via a pointer to a short ``uniform`` array,
+as follows:
+
+::
+
+    export void add4(uniform int ptr[4]) {
+        foreach (i = 0 ... 4)
+            ptr[i]++;
+    }
+
+On the 4-wide SSE instruction set, this compiles to a single vector add
+instruction (and associated move instructions), while it still also
+efficiently computes the correct result on 8-wide AVX targets.
+
+
 Programming Techniques
 ======================
 
diff --git a/docs/ispc.rst b/docs/ispc.rst
index 4be80a18..9edd7325 100644
--- a/docs/ispc.rst
+++ b/docs/ispc.rst
@@ -121,10 +121,14 @@ Contents:
 
 * `The ISPC Standard Library`_
 
+  + `Basic Operations On Data`_
+
+    * `Logical and Selection Operations`_
+    * `Bit Operations`_
+
   + `Math Functions`_
 
     * `Basic Math Functions`_
-    * `Bit-Level Operations`_
     * `Transcendental Functions`_
     * `Pseudo-Random Numbers`_
 
@@ -538,7 +542,7 @@ preprocessor runs:
   * - ISPC
     - 1
     - Detecting that the ``ispc`` compiler is processing the file
-  * - ISPC_TARGET_{SSE2,SSE4,AVX}
+  * - ISPC_TARGET_{SSE2,SSE4,AVX,AVX2}
     - 1
     - One of these will be set, depending on the compilation target.
   * - ISPC_POINTER_SIZE
@@ -1390,8 +1394,8 @@ Types
 Basic Types and Type Qualifiers
 -------------------------------
 
-``ispc`` is a statically-typed language.  It supports a variety of basic
-types.
+``ispc`` is a statically-typed language.  It supports a variety of core
+basic types:
 
 * ``void``: "empty" type representing no value.
 * ``bool``: boolean value; may be assigned ``true``, ``false``, or the
@@ -1408,6 +1412,15 @@ types.
 * ``unsigned int64``: 64-bit unsigned integer.
 * ``double``: 64-bit double-precision floating point value.
 
+There are also a few built-in types related to pointers and memory:
+
+* ``size_t``: the maximum size of any object (structure or array)
+* ``ptrdiff_t``: an integer type large enough to represent the difference
+  between two pointers
+* ``intptr_t``: signed integer type that is large enough to represent
+  a pointer value
+* ``uintptr_t``: unsigned integer type large enough to represent a pointer
+
 Implicit type conversion between values of different types is done
 automatically by the ``ispc`` compiler.  Thus, a value of ``float`` type
 can be assigned to a variable of ``int`` type directly.  In binary
@@ -2150,6 +2163,12 @@ greater than or equal to ``NUM_ITEMS``.
         // ...
     }
 
+Short-circuiting may impose some overhead in the generated code; for cases
+where short-circuiting is undesirable due to performance impact, see
+the section `Logical and Selection Operations`_, which introduces helper
+functions in the standard library that provide these operations without
+short-circuiting.
+
 
 Dynamic Memory Allocation
 -------------------------
@@ -2827,6 +2846,123 @@ The ISPC Standard Library
 compiling ``ispc`` programs.  (To disable the standard library, pass the
 ``--nostdlib`` command-line flag to the compiler.)
 
+Basic Operations On Data
+------------------------
+
+Logical and Selection Operations
+--------------------------------
+
+Recall from `Expressions`_ that ``ispc`` short-circuits the evaluation of
+logical and selection operators: given an expression like ``(index < count
+&& array[index] == 0)``, then ``array[index] == 0`` is only evaluated if
+``index < count`` is true.  This property is useful for writing expressions
+like the preceeding one, where the second expression may not be safe to
+evaluate in some cases.
+
+This short-circuiting can impose overhead in the generated code; additional
+operations are required to test the first value and to conditionally jump
+over the code that evaluates the second value.  The ``ispc`` compiler does
+try to mitigate this cost by detecting cases where it is both safe and
+inexpensive to evaluate both expressions, and skips short-circuiting in the
+generated code in this case (without there being any programmer-visible
+change in program behavior.)
+
+For cases where the compiler can't detect this case but the programmer
+wants to avoid short-circuiting behavior, the standard library provides a
+few helper functions.  First, ``and()`` and ``or()`` provide
+non-short-circuiting logical AND and OR operations.
+
+::
+
+    bool and(bool a, bool b)
+    bool or(bool a, bool b)
+    uniform bool and(uniform bool a, uniform bool b)
+    uniform bool or(uniform bool a, uniform bool b)
+
+And there are three variants of ``select()`` that select between two values
+based on a boolean condition.  These are the variants of ``select()`` for
+the ``int8`` type:
+
+::
+
+    int8 select(bool v, int8 a, int8 b)
+    int8 select(uniform bool v, int8 a, int8 b)
+    uniform int8 select(uniform bool v, uniform int8 a, uniform int8 b)
+
+There are also variants for ``int16``, ``int32``, ``int64``, ``float``, and
+``double`` types.
+
+Bit Operations
+--------------
+
+The various variants of ``popcnt()`` return the population count--the
+number of bits set in the given value.
+
+::
+
+    uniform int popcnt(uniform int v)
+    int popcnt(int v)
+    uniform int popcnt(bool v)
+
+
+A few functions determine how many leading bits in the given value are zero
+and how many of the trailing bits are zero; there are also ``unsigned``
+variants of these functions and variants that take ``int64`` and ``unsigned
+int64`` types.
+
+::
+
+    int32 count_leading_zeros(int32 v)
+    uniform int32 count_leading_zeros(uniform int32 v)
+    int32 count_trailing_zeros(int32 v)
+    uniform int32 count_trailing_zeros(uniform int32 v)
+
+Sometimes it's useful to convert a ``bool`` value to an integer using sign
+extension so that the integer's bits are all on if the ``bool`` has the
+value ``true`` (rather than just having the value one).  The
+``sign_extend()`` functions provide this functionality:
+
+::
+
+    int sign_extend(bool value) 
+    uniform int sign_extend(uniform bool value) 
+
+The ``intbits()`` and ``floatbits()`` functions can be used to implement
+low-level floating-point bit twiddling.  For example, ``intbits()`` returns
+an ``unsigned int`` that is a bit-for-bit copy of the given ``float``
+value.  (Note: it is **not** the same as ``(int)a``, but corresponds to
+something like ``*((int *)&a)`` in C.
+
+::
+
+    float floatbits(unsigned int a);
+    uniform float floatbits(uniform unsigned int a);
+    unsigned int intbits(float a);
+    uniform unsigned int intbits(uniform float a);
+
+
+The ``intbits()`` and ``floatbits()`` functions have no cost at runtime;
+they just let the compiler know how to interpret the bits of the given
+value.  They make it possible to efficiently write functions that take
+advantage of the low-level bit representation of floating-point values.
+
+For example, the ``abs()`` function in the standard library is implemented
+as follows:
+
+::
+
+    float abs(float a) {
+        unsigned int i = intbits(a);
+        i &= 0x7fffffff;
+        return floatbits(i);
+    }
+
+This code directly clears the high order bit to ensure that the given
+floating-point value is positive.  This compiles down to a single ``andps``
+instruction when used with an Intel® SSE target, for example.
+
+
+
 Math Functions
 --------------
 
@@ -2919,77 +3055,6 @@ quite efficient.)
                                uniform unsigned int low,
                                uniform unsigned int high)
 
-Bit-Level Operations
---------------------
-
-
-The various variants of ``popcnt()`` return the population count--the
-number of bits set in the given value.
-
-::
-
-    uniform int popcnt(uniform int v)
-    int popcnt(int v)
-    uniform int popcnt(bool v)
-
-
-A few functions determine how many leading bits in the given value are zero
-and how many of the trailing bits are zero; there are also ``unsigned``
-variants of these functions and variants that take ``int64`` and ``unsigned
-int64`` types.
-
-::
-
-    int32 count_leading_zeros(int32 v)
-    uniform int32 count_leading_zeros(uniform int32 v)
-    int32 count_trailing_zeros(int32 v)
-    uniform int32 count_trailing_zeros(uniform int32 v)
-
-Sometimes it's useful to convert a ``bool`` value to an integer using sign
-extension so that the integer's bits are all on if the ``bool`` has the
-value ``true`` (rather than just having the value one).  The
-``sign_extend()`` functions provide this functionality:
-
-::
-
-    int sign_extend(bool value) 
-    uniform int sign_extend(uniform bool value) 
-
-The ``intbits()`` and ``floatbits()`` functions can be used to implement
-low-level floating-point bit twiddling.  For example, ``intbits()`` returns
-an ``unsigned int`` that is a bit-for-bit copy of the given ``float``
-value.  (Note: it is **not** the same as ``(int)a``, but corresponds to
-something like ``*((int *)&a)`` in C.
-
-::
-
-    float floatbits(unsigned int a);
-    uniform float floatbits(uniform unsigned int a);
-    unsigned int intbits(float a);
-    uniform unsigned int intbits(uniform float a);
-
-
-The ``intbits()`` and ``floatbits()`` functions have no cost at runtime;
-they just let the compiler know how to interpret the bits of the given
-value.  They make it possible to efficiently write functions that take
-advantage of the low-level bit representation of floating-point values.
-
-For example, the ``abs()`` function in the standard library is implemented
-as follows:
-
-::
-
-    float abs(float a) {
-        unsigned int i = intbits(a);
-        i &= 0x7fffffff;
-        return floatbits(i);
-    }
-
-This code directly clears the high order bit to ensure that the given
-floating-point value is positive.  This compiles down to a single ``andps``
-instruction when used with an Intel® SSE target, for example.
-
-
 Transcendental Functions
 ------------------------
 
@@ -3027,8 +3092,8 @@ The corresponding inverse functions are also available:
    uniform float acos(uniform float x)
    float atan(float x)
    uniform float atan(uniform float x)
-   float atan2(float x, float y)
-   uniform float atan2(uniform float x, uniform float y)
+   float atan2(float y, float x)
+   uniform float atan2(uniform float y, uniform float x)
 
 If both sine and cosine are needed, then the ``sincos()`` call computes
 both more efficiently than two calls to the respective individual
@@ -3732,6 +3797,13 @@ For global atomics, only atomic swap is available for these types:
   float atomic_swap_global(uniform float * uniform ptr, float value)
   double atomic_swap_global(uniform double * uniform ptr, double value)
 
+Finally, "swap" (but none of these other atomics) is available for pointer
+types:
+
+::
+
+  void *atomic_swap_{local,global}(void * * uniform ptr, void * value)
+
 There are also variants of the atomic that take ``uniform`` values for the
 operand and return a ``uniform`` result.  These correspond to a single
 atomic operation being performed for the entire gang of program instances,
@@ -3756,6 +3828,13 @@ rather than one per program instance.
   uniform int32 atomic_swap_{local,global}(uniform int32 * uniform ptr,
                                            uniform int32 newval)
 
+And similarly for pointers:
+
+::
+
+  uniform void *atomic_swap_{local,global}(void * * uniform ptr,
+                                           void *newval)
+
 Be careful that you use the atomic function that you mean to; consider the
 following code:
 
@@ -3797,12 +3876,18 @@ the same location in memory!)
   int32 atomic_xor_{local,global}(uniform int32 * varying ptr, int32 value)
   int32 atomic_swap_{local,global}(uniform int32 * varying ptr, int32 value)
 
+And:
+
+::
+
+  void *atomic_swap_{local,global}(void * * ptr, void *value)
+
 There are also atomic "compare and exchange" functions.  Compare and
 exchange atomically compares the value in "val" to "compare"--if they
 match, it assigns "newval" to "val".  In either case, the old value of
 "val" is returned.  (As with the other atomic operations, there are also
 ``unsigned`` and 64-bit variants of this function.  Furthermore, there are
-``float`` and ``double`` variants as well.)
+``float``, ``double``, and ``void *`` variants as well.)
 
 ::
 
@@ -3824,6 +3909,11 @@ code.
 
     void memory_barrier();
 
+Note that this barrier is *not* needed for coordinating reads and writes
+among the program instances in a gang; it's only needed for coordinating
+between multiple hardware threads running on different cores.  See the
+section `Data Races Within a Gang`_ for the guarantees provided about
+memory read/write ordering across a gang.
 
 Prefetches
 ----------
diff --git a/docs/news.rst b/docs/news.rst
index e875b077..ad6c4bd5 100644
--- a/docs/news.rst
+++ b/docs/news.rst
@@ -2,6 +2,24 @@
 ispc News
 =========
 
+ispc 1.2.1 is Released
+----------------------
+
+This is a bugfix release, fixing approximately 20 bugs in the system and
+improving error handling and error reporting.  New functionality includes
+very efficient float/half conversion routines thanks to Fabian 
+Giesen.  See the `1.2.1 release notes`_ for details.
+
+.. _1.2.1 release notes: https://github.com/ispc/ispc/tree/master/docs/ReleaseNotes.txt
+
+ispc 1.2.0 is Released
+-----------------------
+
+A new major release was posted on March 20, 2012.  This release includes
+significant new functionality for cleanly handling "structure of arrays"
+(SoA) data layout and a new model for how uniform and varying are handled
+with structure types.  
+
 Paper on ispc To Appear in InPar 2012
 -------------------------------------
 
diff --git a/doxygen.cfg b/doxygen.cfg
index f8637ddf..1659fbdd 100644
--- a/doxygen.cfg
+++ b/doxygen.cfg
@@ -31,7 +31,7 @@ PROJECT_NAME           = "Intel SPMD Program Compiler"
 # This could be handy for archiving the generated documentation or
 # if some version control system is used.
 
-PROJECT_NUMBER         = 1.2.0
+PROJECT_NUMBER         = 1.2.1
 
 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
 # base path where the generated documentation will be put.
diff --git a/expr.cpp b/expr.cpp
index 17541012..b43f9e54 100644
--- a/expr.cpp
+++ b/expr.cpp
@@ -212,11 +212,27 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr,
     }
 
     if (dynamic_cast<const FunctionType *>(fromType)) {
-        if (!failureOk)
-            Error(pos, "Can't convert function type \"%s\" to \"%s\" for %s.",
-                  fromType->GetString().c_str(),
-                  toType->GetString().c_str(), errorMsgBase);
-        return false;
+        if (dynamic_cast<const PointerType *>(toType) != NULL) {
+            // Convert function type to pointer to function type
+            if (expr != NULL) {
+                Expr *aoe = new AddressOfExpr(*expr, (*expr)->pos);
+                if (lDoTypeConv(aoe->GetType(), toType, &aoe, failureOk,
+                                errorMsgBase, pos)) {
+                    *expr = aoe;
+                    return true;
+                }
+            }
+            else
+                return lDoTypeConv(PointerType::GetUniform(fromType), toType, NULL,
+                                   failureOk, errorMsgBase, pos);
+        }
+        else {
+            if (!failureOk)
+                Error(pos, "Can't convert function type \"%s\" to \"%s\" for %s.",
+                      fromType->GetString().c_str(),
+                      toType->GetString().c_str(), errorMsgBase);
+            return false;
+        }
     }
     if (dynamic_cast<const FunctionType *>(toType)) {
         if (!failureOk)
@@ -253,6 +269,11 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr,
     // "float foo[10]" -> "float * uniform foo", we have what's seemingly
     // a varying to uniform conversion (but not really)
     if (fromArrayType != NULL && toPointerType != NULL) {
+        // can convert any array to a void pointer (both uniform and
+        // varying).
+        if (PointerType::IsVoidPointer(toPointerType))
+            goto typecast_ok;
+
         // array to pointer to array element type
         const Type *eltType = fromArrayType->GetElementType();
         if (toPointerType->GetBaseType()->IsConstType())
@@ -323,8 +344,8 @@ lDoTypeConv(const Type *fromType, const Type *toType, Expr **expr,
                  !Type::Equal(fromPointerType->GetBaseType()->GetAsConstType(), 
                               toPointerType->GetBaseType())) {
             if (!failureOk)
-                Error(pos, "Can't convert between incompatible pointer types "
-                      "\"%s\" and \"%s\" for %s.",
+                Error(pos, "Can't convert from pointer type \"%s\" to "
+                      "incompatible pointer type \"%s\" for %s.",
                       fromPointerType->GetString().c_str(),
                       toPointerType->GetString().c_str(), errorMsgBase);
             return false;
@@ -616,19 +637,23 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr,
         // instead we'll make a constant static global that holds the
         // constant value and emit a memcpy to put its value into the
         // pointer we have.
-        LLVM_TYPE_CONST llvm::Type *llvmType = symType->LLVMType(g->ctx);
+        llvm::Type *llvmType = symType->LLVMType(g->ctx);
         if (llvmType == NULL) {
             Assert(m->errorCount > 0);
             return;
         }
 
-        llvm::Value *constPtr = 
-            new llvm::GlobalVariable(*m->module, llvmType, true /* const */, 
-                                     llvm::GlobalValue::InternalLinkage,
-                                     constValue, "const_initializer");
-        llvm::Value *size = g->target.SizeOf(llvmType, 
-                                             ctx->GetCurrentBasicBlock());
-        ctx->MemcpyInst(ptr, constPtr, size);
+        if (Type::IsBasicType(symType))
+            ctx->StoreInst(constValue, ptr);
+        else {
+            llvm::Value *constPtr = 
+                new llvm::GlobalVariable(*m->module, llvmType, true /* const */, 
+                                         llvm::GlobalValue::InternalLinkage,
+                                         constValue, "const_initializer");
+            llvm::Value *size = g->target.SizeOf(llvmType, 
+                                                 ctx->GetCurrentBasicBlock());
+            ctx->MemcpyInst(ptr, constPtr, size);
+        }
 
         return;
     }
@@ -746,7 +771,7 @@ InitSymbol(llvm::Value *ptr, const Type *symType, Expr *initExpr,
                 else {
                     // If we don't have enough initializer values, initialize the
                     // rest as zero.
-                    LLVM_TYPE_CONST llvm::Type *llvmType = elementType->LLVMType(g->ctx);
+                    llvm::Type *llvmType = elementType->LLVMType(g->ctx);
                     if (llvmType == NULL) {
                         Assert(m->errorCount > 0);
                         return;
@@ -880,7 +905,7 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) {
         // a recursive call to lLLVMConstantValue().
         const Type *baseType = vectorType->GetBaseType();
         llvm::Constant *constElement = lLLVMConstantValue(baseType, ctx, value);
-        LLVM_TYPE_CONST llvm::Type *llvmVectorType = vectorType->LLVMType(ctx);
+        llvm::Type *llvmVectorType = vectorType->LLVMType(ctx);
 
         // Now create a constant version of the corresponding LLVM type that we
         // use to represent the VectorType.
@@ -889,8 +914,8 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) {
         // LLVM ArrayTypes leaks into the code here; it feels like this detail
         // should be better encapsulated?
         if (baseType->IsUniformType()) {
-            LLVM_TYPE_CONST llvm::VectorType *lvt = 
-                llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(llvmVectorType);
+            llvm::VectorType *lvt = 
+                llvm::dyn_cast<llvm::VectorType>(llvmVectorType);
             Assert(lvt != NULL);
             std::vector<llvm::Constant *> vals;
             for (unsigned int i = 0; i < lvt->getNumElements(); ++i)
@@ -898,8 +923,8 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) {
             return llvm::ConstantVector::get(vals);
         }
         else {
-            LLVM_TYPE_CONST llvm::ArrayType *lat = 
-                llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(llvmVectorType);
+            llvm::ArrayType *lat = 
+                llvm::dyn_cast<llvm::ArrayType>(llvmVectorType);
             Assert(lat != NULL);
             std::vector<llvm::Constant *> vals;
             for (unsigned int i = 0; i < lat->getNumElements(); ++i)
@@ -1414,7 +1439,7 @@ lEmitBinaryPointerArith(BinaryExpr::Op op, llvm::Value *value0,
 
             // Now divide by the size of the type that the pointer
             // points to in order to return the difference in elements.
-            LLVM_TYPE_CONST llvm::Type *llvmElementType = 
+            llvm::Type *llvmElementType = 
                 ptrType->GetBaseType()->LLVMType(g->ctx);
             llvm::Value *size = g->target.SizeOf(llvmElementType, 
                                                  ctx->GetCurrentBasicBlock());
@@ -1623,7 +1648,7 @@ lEmitLogicalOp(BinaryExpr::Op op, Expr *arg0, Expr *arg1,
 
     // Allocate temporary storage for the return value
     const Type *retType = Type::MoreGeneralType(type0, type1, pos, lOpString(op));
-    LLVM_TYPE_CONST llvm::Type *llvmRetType = retType->LLVMType(g->ctx);
+    llvm::Type *llvmRetType = retType->LLVMType(g->ctx);
     llvm::Value *retPtr = ctx->AllocaInst(llvmRetType, "logical_op_mem");
 
     llvm::BasicBlock *bbSkipEvalValue1 = ctx->CreateBasicBlock("skip_eval_1");
@@ -2314,6 +2339,7 @@ BinaryExpr::TypeCheck() {
         if (type1->IsVaryingType()) {
             arg0 = TypeConvertExpr(arg0, type0->GetAsVaryingType(), 
                                    "pointer addition");
+            offsetType = offsetType->GetAsVaryingType();
             Assert(arg0 != NULL);
         }
 
@@ -2984,7 +3010,7 @@ SelectExpr::GetValue(FunctionEmitContext *ctx) const {
         // Temporary storage to store the values computed for each
         // expression, if any.  (These stay as uninitialized memory if we
         // short circuit around the corresponding expression.)
-        LLVM_TYPE_CONST llvm::Type *exprType = 
+        llvm::Type *exprType = 
             expr1->GetType()->LLVMType(g->ctx);
         llvm::Value *expr1Ptr = ctx->AllocaInst(exprType);
         llvm::Value *expr2Ptr = ctx->AllocaInst(exprType);
@@ -3400,32 +3426,43 @@ FunctionCallExpr::TypeCheck() {
         return NULL;
 
     std::vector<const Type *> argTypes;
-    std::vector<bool> argCouldBeNULL;
+    std::vector<bool> argCouldBeNULL, argIsConstant;
     for (unsigned int i = 0; i < args->exprs.size(); ++i) {
-        if (args->exprs[i] == NULL)
+        Expr *expr = args->exprs[i];
+
+        if (expr == NULL)
             return NULL;
-        const Type *t = args->exprs[i]->GetType();
+        const Type *t = expr->GetType();
         if (t == NULL)
             return NULL;
+
         argTypes.push_back(t);
-        argCouldBeNULL.push_back(lIsAllIntZeros(args->exprs[i]));
+        argCouldBeNULL.push_back(lIsAllIntZeros(expr) ||
+                                 dynamic_cast<NullPointerExpr *>(expr));
+        argIsConstant.push_back(dynamic_cast<ConstExpr *>(expr) ||
+                                dynamic_cast<NullPointerExpr *>(expr));
     }
 
     FunctionSymbolExpr *fse = dynamic_cast<FunctionSymbolExpr *>(func);
     if (fse != NULL) {
         // Regular function call
-
-        if (fse->ResolveOverloads(args->pos, argTypes, &argCouldBeNULL) == false)
+        if (fse->ResolveOverloads(args->pos, argTypes, &argCouldBeNULL,
+                                  &argIsConstant) == false)
             return NULL;
 
         func = ::TypeCheck(fse);
         if (func == NULL)
             return NULL;
 
-        const PointerType *pt = 
-            dynamic_cast<const PointerType *>(func->GetType());
-        const FunctionType *ft = (pt == NULL) ? NULL : 
-            dynamic_cast<const FunctionType *>(pt->GetBaseType());
+        const FunctionType *ft = 
+            dynamic_cast<const FunctionType *>(func->GetType());
+        if (ft == NULL) {
+            const PointerType *pt = 
+                dynamic_cast<const PointerType *>(func->GetType());
+            ft = (pt == NULL) ? NULL : 
+                dynamic_cast<const FunctionType *>(pt->GetBaseType());
+        }
+
         if (ft == NULL) {
             Error(pos, "Valid function name must be used for function call.");
             return NULL;
@@ -3625,7 +3662,19 @@ ExprList::GetConstant(const Type *type) const {
         if (exprs[i] == NULL)
             return NULL;
         const Type *elementType = collectionType->GetElementType(i);
-        llvm::Constant *c = exprs[i]->GetConstant(elementType);
+
+        Expr *expr = exprs[i];
+        if (dynamic_cast<ExprList *>(expr) == NULL) {
+            // If there's a simple type conversion from the type of this
+            // expression to the type we need, then let the regular type
+            // conversion machinery handle it.
+            expr = TypeConvertExpr(exprs[i], elementType, "initializer list");
+            Assert(expr != NULL);
+            // Re-establish const-ness if possible
+            expr = ::Optimize(expr);
+        }
+
+        llvm::Constant *c = expr->GetConstant(elementType);
         if (c == NULL)
             // If this list element couldn't convert to the right constant
             // type for the corresponding collection member, then give up.
@@ -3641,7 +3690,7 @@ ExprList::GetConstant(const Type *type) const {
             return NULL;
         }
 
-        LLVM_TYPE_CONST llvm::Type *llvmType = elementType->LLVMType(g->ctx);
+        llvm::Type *llvmType = elementType->LLVMType(g->ctx);
         if (llvmType == NULL) {
             Assert(m->errorCount > 0);
             return NULL;
@@ -3652,27 +3701,23 @@ ExprList::GetConstant(const Type *type) const {
     }
 
     if (dynamic_cast<const StructType *>(type) != NULL) {
-#if defined(LLVM_2_9)
-        return llvm::ConstantStruct::get(*g->ctx, cv, false);
-#else
-        LLVM_TYPE_CONST llvm::StructType *llvmStructType =
-            llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(collectionType->LLVMType(g->ctx));
+        llvm::StructType *llvmStructType =
+            llvm::dyn_cast<llvm::StructType>(collectionType->LLVMType(g->ctx));
         Assert(llvmStructType != NULL);
         return llvm::ConstantStruct::get(llvmStructType, cv);
-#endif
     }
     else {
-        LLVM_TYPE_CONST llvm::Type *lt = type->LLVMType(g->ctx);
-        LLVM_TYPE_CONST llvm::ArrayType *lat = 
-            llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(lt);
+        llvm::Type *lt = type->LLVMType(g->ctx);
+        llvm::ArrayType *lat = 
+            llvm::dyn_cast<llvm::ArrayType>(lt);
         if (lat != NULL)
             return llvm::ConstantArray::get(lat, cv);
         else {
             // uniform short vector type
             Assert(type->IsUniformType() &&
                    dynamic_cast<const VectorType *>(type) != NULL);
-            LLVM_TYPE_CONST llvm::VectorType *lvt = 
-                llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(lt);
+            llvm::VectorType *lvt = 
+                llvm::dyn_cast<llvm::VectorType>(lt);
             Assert(lvt != NULL);
 
             // Uniform short vectors are stored as vectors of length
@@ -3949,10 +3994,10 @@ IndexExpr::GetBaseSymbol() const {
 static llvm::Value *
 lConvertToSlicePointer(FunctionEmitContext *ctx, llvm::Value *ptr,
                        const PointerType *slicePtrType) {
-    LLVM_TYPE_CONST llvm::Type *llvmSlicePtrType = 
+    llvm::Type *llvmSlicePtrType = 
         slicePtrType->LLVMType(g->ctx);
-    LLVM_TYPE_CONST llvm::StructType *sliceStructType =
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(llvmSlicePtrType);
+    llvm::StructType *sliceStructType =
+        llvm::dyn_cast<llvm::StructType>(llvmSlicePtrType);
     Assert(sliceStructType != NULL &&
            sliceStructType->getElementType(0) == ptr->getType());
 
@@ -4647,12 +4692,13 @@ MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos,
         exprType = pointerType->GetBaseType();
 
     if (derefLValue == true && pointerType == NULL) {
-        if (dynamic_cast<const StructType *>(exprType->GetReferenceTarget()) != NULL)
-            Error(p, "Dereference operator \"->\" can't be applied to non-pointer "
+        const Type *targetType = exprType->GetReferenceTarget();
+        if (dynamic_cast<const StructType *>(targetType) != NULL)
+            Error(p, "Member operator \"->\" can't be applied to non-pointer "
                   "type \"%s\".  Did you mean to use \".\"?", 
                   exprType->GetString().c_str());
         else
-            Error(p, "Dereference operator \"->\" can't be applied to non-struct "
+            Error(p, "Member operator \"->\" can't be applied to non-struct "
                   "pointer type \"%s\".", exprType->GetString().c_str());
         return NULL;
     }
@@ -4668,6 +4714,12 @@ MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos,
         return new StructMemberExpr(e, id, p, idpos, derefLValue);
     else if (dynamic_cast<const VectorType *>(exprType) != NULL)
         return new VectorMemberExpr(e, id, p, idpos, derefLValue);
+    else if (dynamic_cast<const UndefinedStructType *>(exprType)) {
+        Error(p, "Member operator \"%s\" can't be applied to declared "
+              "but not defined struct type \"%s\".", derefLValue ? "->" : ".",
+              exprType->GetString().c_str());
+        return NULL;
+    }
     else {
         Error(p, "Member operator \"%s\" can't be used with expression of "
               "\"%s\" type.", derefLValue ? "->" : ".", 
@@ -5630,7 +5682,7 @@ ConstExpr::GetConstant(const Type *type) const {
         // The only time we should get here is if we have an integer '0'
         // constant that should be turned into a NULL pointer of the
         // appropriate type.
-        LLVM_TYPE_CONST llvm::Type *llvmType = type->LLVMType(g->ctx);
+        llvm::Type *llvmType = type->LLVMType(g->ctx);
         if (llvmType == NULL) {
             Assert(m->errorCount > 0);
             return NULL;
@@ -5743,7 +5795,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
 
     switch (toType->basicType) {
     case AtomicType::TYPE_FLOAT: {
-        LLVM_TYPE_CONST llvm::Type *targetType = 
+        llvm::Type *targetType = 
             fromType->IsUniformType() ? LLVMTypes::FloatType : 
                                         LLVMTypes::FloatVectorType;
         switch (fromType->basicType) {
@@ -5787,7 +5839,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
         break;
     }
     case AtomicType::TYPE_DOUBLE: {
-        LLVM_TYPE_CONST llvm::Type *targetType = 
+        llvm::Type *targetType = 
             fromType->IsUniformType() ? LLVMTypes::DoubleType :
                                         LLVMTypes::DoubleVectorType;
         switch (fromType->basicType) {
@@ -5825,7 +5877,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
         break;
     }
     case AtomicType::TYPE_INT8: {
-        LLVM_TYPE_CONST llvm::Type *targetType = 
+        llvm::Type *targetType = 
             fromType->IsUniformType() ? LLVMTypes::Int8Type :
                                         LLVMTypes::Int8VectorType;
         switch (fromType->basicType) {
@@ -5861,7 +5913,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
         break;
     }
     case AtomicType::TYPE_UINT8: {
-        LLVM_TYPE_CONST llvm::Type *targetType = 
+        llvm::Type *targetType = 
             fromType->IsUniformType() ? LLVMTypes::Int8Type :
                                         LLVMTypes::Int8VectorType;
         switch (fromType->basicType) {
@@ -5903,7 +5955,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
         break;
     }
     case AtomicType::TYPE_INT16: {
-        LLVM_TYPE_CONST llvm::Type *targetType = 
+        llvm::Type *targetType = 
             fromType->IsUniformType() ? LLVMTypes::Int16Type :
                                         LLVMTypes::Int16VectorType;
         switch (fromType->basicType) {
@@ -5943,7 +5995,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
         break;
     }
     case AtomicType::TYPE_UINT16: {
-        LLVM_TYPE_CONST llvm::Type *targetType = 
+        llvm::Type *targetType = 
             fromType->IsUniformType() ? LLVMTypes::Int16Type :
                                         LLVMTypes::Int16VectorType;
         switch (fromType->basicType) {
@@ -5989,7 +6041,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
         break;
     }
     case AtomicType::TYPE_INT32: {
-        LLVM_TYPE_CONST llvm::Type *targetType = 
+        llvm::Type *targetType = 
             fromType->IsUniformType() ? LLVMTypes::Int32Type :
                                         LLVMTypes::Int32VectorType;
         switch (fromType->basicType) {
@@ -6029,7 +6081,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
         break;
     }
     case AtomicType::TYPE_UINT32: {
-        LLVM_TYPE_CONST llvm::Type *targetType = 
+        llvm::Type *targetType = 
             fromType->IsUniformType() ? LLVMTypes::Int32Type :
                                         LLVMTypes::Int32VectorType;
         switch (fromType->basicType) {
@@ -6075,7 +6127,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
         break;
     }
     case AtomicType::TYPE_INT64: {
-        LLVM_TYPE_CONST llvm::Type *targetType = 
+        llvm::Type *targetType = 
             fromType->IsUniformType() ? LLVMTypes::Int64Type : 
                                         LLVMTypes::Int64VectorType;
         switch (fromType->basicType) {
@@ -6113,7 +6165,7 @@ lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprVal,
         break;
     }
     case AtomicType::TYPE_UINT64: {
-        LLVM_TYPE_CONST llvm::Type *targetType = 
+        llvm::Type *targetType = 
             fromType->IsUniformType() ? LLVMTypes::Int64Type : 
                                         LLVMTypes::Int64VectorType;
         switch (fromType->basicType) {
@@ -6257,7 +6309,7 @@ lUniformValueToVarying(FunctionEmitContext *ctx, llvm::Value *value,
     const CollectionType *collectionType = 
         dynamic_cast<const CollectionType *>(type);
     if (collectionType != NULL) {
-        LLVM_TYPE_CONST llvm::Type *llvmType = 
+        llvm::Type *llvmType = 
             type->GetAsVaryingType()->LLVMType(g->ctx);
         llvm::Value *retValue = llvm::UndefValue::get(llvmType);
 
@@ -6283,10 +6335,17 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
 
     ctx->SetDebugPos(pos);
     const Type *toType = GetType(), *fromType = expr->GetType();
-    if (!toType || !fromType || Type::Equal(toType, AtomicType::Void) || 
-        Type::Equal(fromType, AtomicType::Void))
-        // an error should have been issued elsewhere in this case
+    if (toType == NULL || fromType == NULL) {
+        Assert(m->errorCount > 0);
         return NULL;
+    }
+
+    if (Type::Equal(toType, AtomicType::Void)) {
+        // emit the code for the expression in case it has side-effects but
+        // then we're done.
+        (void)expr->GetValue(ctx);
+        return NULL;
+    }
 
     const PointerType *fromPointerType = dynamic_cast<const PointerType *>(fromType);
     const PointerType *toPointerType = dynamic_cast<const PointerType *>(toType);
@@ -6352,10 +6411,10 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
             Assert(dynamic_cast<const AtomicType *>(toType) != NULL);
             if (toType->IsBoolType()) {
                 // convert pointer to bool
-                LLVM_TYPE_CONST llvm::Type *lfu = 
+                llvm::Type *lfu = 
                     fromType->GetAsUniformType()->LLVMType(g->ctx);
-                LLVM_TYPE_CONST llvm::PointerType *llvmFromUnifType = 
-                    llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(lfu);
+                llvm::PointerType *llvmFromUnifType = 
+                    llvm::dyn_cast<llvm::PointerType>(lfu);
 
                 llvm::Value *nullPtrValue = 
                     llvm::ConstantPointerNull::get(llvmFromUnifType);
@@ -6384,7 +6443,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
                 if (toType->IsVaryingType() && fromType->IsUniformType())
                     value = ctx->SmearUniform(value);
 
-                LLVM_TYPE_CONST llvm::Type *llvmToType = toType->LLVMType(g->ctx);
+                llvm::Type *llvmToType = toType->LLVMType(g->ctx);
                 if (llvmToType == NULL)
                     return NULL;
                 return ctx->PtrToIntInst(value, llvmToType, "ptr_typecast");
@@ -6401,7 +6460,8 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
         // implicit array to pointer to first element
         Expr *arrayAsPtr = lArrayToPointer(expr);
         if (Type::EqualIgnoringConst(arrayAsPtr->GetType(), toPointerType) == false) {
-            Assert(Type::EqualIgnoringConst(arrayAsPtr->GetType()->GetAsVaryingType(),
+            Assert(PointerType::IsVoidPointer(toPointerType) ||
+                   Type::EqualIgnoringConst(arrayAsPtr->GetType()->GetAsVaryingType(),
                                             toPointerType) == true);
             arrayAsPtr = new TypeCastExpr(toPointerType, arrayAsPtr, pos);
             arrayAsPtr = ::TypeCheck(arrayAsPtr);
@@ -6426,7 +6486,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
         Assert(Type::EqualIgnoringConst(toArrayType->GetBaseType(),
                                         fromArrayType->GetBaseType()));
         llvm::Value *v = expr->GetValue(ctx);
-        LLVM_TYPE_CONST llvm::Type *ptype = toType->LLVMType(g->ctx);
+        llvm::Type *ptype = toType->LLVMType(g->ctx);
         return ctx->BitCastInst(v, ptype); //, "array_cast_0size");
     }
 
@@ -6448,7 +6508,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
             Assert(Type::EqualIgnoringConst(toArray->GetBaseType(),
                                             fromArray->GetBaseType()));
             llvm::Value *v = expr->GetValue(ctx);
-            LLVM_TYPE_CONST llvm::Type *ptype = toType->LLVMType(g->ctx);
+            llvm::Type *ptype = toType->LLVMType(g->ctx);
             return ctx->BitCastInst(v, ptype); //, "array_cast_0size");
         }
 
@@ -6536,7 +6596,7 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
         if (toType->IsVaryingType() && fromType->IsUniformType())
             exprVal = ctx->SmearUniform(exprVal);
 
-        LLVM_TYPE_CONST llvm::Type *llvmToType = toType->LLVMType(g->ctx);
+        llvm::Type *llvmToType = toType->LLVMType(g->ctx);
         if (llvmToType == NULL)
             return NULL;
 
@@ -6589,7 +6649,12 @@ TypeCastExpr::TypeCheck() {
     fromType = lDeconstifyType(fromType);
     toType = lDeconstifyType(toType);
 
-    if (fromType->IsVaryingType() && toType->IsUniformType()) {
+    // Anything can be cast to void...
+    if (Type::Equal(toType, AtomicType::Void))
+        return this;
+
+    if (Type::Equal(fromType, AtomicType::Void) ||
+        (fromType->IsVaryingType() && toType->IsUniformType())) {
         Error(pos, "Can't type cast from type \"%s\" to type \"%s\"",
               fromType->GetString().c_str(), toType->GetString().c_str());
         return NULL;
@@ -6749,6 +6814,34 @@ TypeCastExpr::GetBaseSymbol() const {
 }
 
 
+static
+llvm::Constant *
+lConvertPointerConstant(llvm::Constant *c, const Type *constType) {
+    if (c == NULL || constType->IsUniformType())
+        return c;
+
+    // Handle conversion to int and then to vector of int or array of int
+    // (for varying and soa types, respectively)
+    llvm::Constant *intPtr = 
+        llvm::ConstantExpr::getPtrToInt(c, LLVMTypes::PointerIntType);
+    Assert(constType->IsVaryingType() || constType->IsSOAType());
+    int count = constType->IsVaryingType() ? g->target.vectorWidth :
+        constType->GetSOAWidth();
+
+    std::vector<llvm::Constant *> smear;
+    for (int i = 0; i < count; ++i)
+        smear.push_back(intPtr);
+
+    if (constType->IsVaryingType())
+        return llvm::ConstantVector::get(smear);
+    else {
+        llvm::ArrayType *at =
+            llvm::ArrayType::get(LLVMTypes::PointerIntType, count);
+        return llvm::ConstantArray::get(at, smear);
+    }
+}
+
+
 llvm::Constant *
 TypeCastExpr::GetConstant(const Type *constType) const {
     // We don't need to worry about most the basic cases where the type
@@ -6756,11 +6849,18 @@ TypeCastExpr::GetConstant(const Type *constType) const {
     // TypeCastExpr::Optimize() method generally ends up doing the type
     // conversion and returning a ConstExpr, which in turn will have its
     // GetConstant() method called.  However, because ConstExpr currently
-    // can't represent pointer values, we have to handle two cases here:
-    // 1. Null pointers (NULL, 0) valued initializers, and
-    // 2. Converting a uniform function pointer to a varying function
-    //    pointer of the same type.
-    return expr->GetConstant(constType);
+    // can't represent pointer values, we have to handle a few cases
+    // related to pointers here:
+    //
+    // 1. Null pointer (NULL, 0) valued initializers
+    // 2. Converting function types to pointer-to-function types
+    // 3. And converting these from uniform to the varying/soa equivalents.
+    //
+    if (dynamic_cast<const PointerType *>(constType) == NULL)
+        return NULL;
+
+    llvm::Constant *c = expr->GetConstant(constType->GetAsUniformType());
+    return lConvertPointerConstant(c, constType);
 }
 
 
@@ -6776,7 +6876,34 @@ ReferenceExpr::ReferenceExpr(Expr *e, SourcePos p)
 llvm::Value *
 ReferenceExpr::GetValue(FunctionEmitContext *ctx) const {
     ctx->SetDebugPos(pos);
-    return expr ? expr->GetLValue(ctx) : NULL;
+    if (expr == NULL) {
+        Assert(m->errorCount > 0);
+        return NULL;
+    }
+    
+    llvm::Value *value = expr->GetLValue(ctx);
+    if (value != NULL)
+        return value;
+
+    // value is NULL if the expression is a temporary; in this case, we'll
+    // allocate storage for it so that we can return the pointer to that...
+    const Type *type;
+    llvm::Type *llvmType;
+    if ((type = expr->GetType()) == NULL ||
+        (llvmType = type->LLVMType(g->ctx)) == NULL) {
+        Assert(m->errorCount > 0);
+        return NULL;
+    }
+
+    value = expr->GetValue(ctx);
+    if (value == NULL) {
+        Assert(m->errorCount > 0);
+        return NULL;
+    }
+
+    llvm::Value *ptr = ctx->AllocaInst(llvmType);
+    ctx->StoreInst(value, ptr);
+    return ptr;
 }
 
 
@@ -7053,7 +7180,8 @@ AddressOfExpr::GetValue(FunctionEmitContext *ctx) const {
         return NULL;
 
     const Type *exprType = expr->GetType();
-    if (dynamic_cast<const ReferenceType *>(exprType) != NULL)
+    if (dynamic_cast<const ReferenceType *>(exprType) != NULL ||
+        dynamic_cast<const FunctionType *>(exprType) != NULL)
         return expr->GetValue(ctx);
     else
         return expr->GetLValue(ctx);
@@ -7068,8 +7196,18 @@ AddressOfExpr::GetType() const {
     const Type *exprType = expr->GetType();
     if (dynamic_cast<const ReferenceType *>(exprType) != NULL)
         return PointerType::GetUniform(exprType->GetReferenceTarget());
-    else
-        return expr->GetLValueType();
+
+    const Type *t = expr->GetLValueType();
+    if (t != NULL)
+        return t;
+    else {
+        t = expr->GetType();
+        if (t == NULL) {
+            Assert(m->errorCount > 0);
+            return NULL;
+        }
+        return PointerType::GetUniform(t);
+    }
 }
 
 
@@ -7093,7 +7231,22 @@ AddressOfExpr::Print() const {
 
 Expr *
 AddressOfExpr::TypeCheck() {
-    return this;
+    const Type *exprType;
+    if (expr == NULL || (exprType = expr->GetType()) == NULL) {
+        Assert(m->errorCount > 0);
+        return NULL;
+    }
+
+    if (dynamic_cast<const ReferenceType *>(exprType) != NULL||
+        dynamic_cast<const FunctionType *>(exprType) != NULL) {
+        return this;
+    }
+
+    if (expr->GetLValueType() != NULL)
+        return this;
+
+    Error(expr->pos, "Illegal to take address of non-lvalue or function.");
+    return NULL;
 }
 
 
@@ -7109,6 +7262,29 @@ AddressOfExpr::EstimateCost() const {
 }
 
 
+llvm::Constant *
+AddressOfExpr::GetConstant(const Type *type) const {
+    const Type *exprType;
+    if (expr == NULL || (exprType = expr->GetType()) == NULL) {
+        Assert(m->errorCount > 0);
+        return NULL;
+    }
+
+    const PointerType *pt = dynamic_cast<const PointerType *>(type);
+    if (pt == NULL)
+        return NULL;
+
+    const FunctionType *ft = 
+        dynamic_cast<const FunctionType *>(pt->GetBaseType());
+    if (ft != NULL) {
+        llvm::Constant *c = expr->GetConstant(ft);
+        return lConvertPointerConstant(c, type);
+    }
+    else
+        return NULL;
+}
+
+
 ///////////////////////////////////////////////////////////////////////////
 // SizeOfExpr
 
@@ -7119,8 +7295,7 @@ SizeOfExpr::SizeOfExpr(Expr *e, SourcePos p)
 
 SizeOfExpr::SizeOfExpr(const Type *t, SourcePos p)
     : Expr(p), expr(NULL), type(t) {
-    if (type->HasUnboundVariability())
-        type = type->ResolveUnboundVariability(Variability::Varying);
+    type = type->ResolveUnboundVariability(Variability::Varying);
 }
 
 
@@ -7131,7 +7306,7 @@ SizeOfExpr::GetValue(FunctionEmitContext *ctx) const {
     if (t == NULL)
         return NULL;
 
-    LLVM_TYPE_CONST llvm::Type *llvmType = t->LLVMType(g->ctx);
+    llvm::Type *llvmType = t->LLVMType(g->ctx);
     if (llvmType == NULL)
         return NULL;
 
@@ -7209,7 +7384,10 @@ SymbolExpr::GetLValueType() const {
     if (symbol == NULL)
         return NULL;
 
-    return PointerType::GetUniform(symbol->type);
+    if (dynamic_cast<const ReferenceType *>(symbol->type) != NULL)
+        return PointerType::GetUniform(symbol->type->GetReferenceTarget());
+    else
+        return PointerType::GetUniform(symbol->type);
 }
 
 
@@ -7285,8 +7463,7 @@ FunctionSymbolExpr::GetType() const {
         return NULL;
     }
 
-    return matchingFunc ? 
-        new PointerType(matchingFunc->type, Variability::Uniform, true) : NULL;
+    return matchingFunc ? matchingFunc->type : NULL;
 }
 
 
@@ -7336,27 +7513,18 @@ FunctionSymbolExpr::GetConstant(const Type *type) const {
     if (matchingFunc == NULL || matchingFunc->function == NULL)
         return NULL;
 
-    const FunctionType *ft;
-    if (dynamic_cast<const PointerType *>(type) == NULL ||
-        (ft = dynamic_cast<const FunctionType *>(type->GetBaseType())) == NULL)
+    const FunctionType *ft = dynamic_cast<const FunctionType *>(type);
+    if (ft == NULL)
         return NULL;
 
-    LLVM_TYPE_CONST llvm::Type *llvmUnifType = 
-        type->GetAsUniformType()->LLVMType(g->ctx);
-    if (llvmUnifType != matchingFunc->function->getType())
+    if (Type::Equal(type, matchingFunc->type) == false) {
+        Error(pos, "Type of function symbol \"%s\" doesn't match expected type "
+              "\"%s\".", matchingFunc->type->GetString().c_str(),
+              type->GetString().c_str());
         return NULL;
-
-    if (type->IsUniformType())
-        return matchingFunc->function;
-    else {
-        llvm::Constant *intPtr = 
-            llvm::ConstantExpr::getPtrToInt(matchingFunc->function, 
-                                            LLVMTypes::PointerIntType);
-        std::vector<llvm::Constant *> smear;
-        for (int i = 0; i < g->target.vectorWidth; ++i)
-            smear.push_back(intPtr);
-        return llvm::ConstantVector::get(smear);
     }
+
+    return matchingFunc->function;
 }
 
 
@@ -7364,8 +7532,12 @@ static void
 lPrintOverloadCandidates(SourcePos pos, const std::vector<Symbol *> &funcs, 
                          const std::vector<const Type *> &argTypes, 
                          const std::vector<bool> *argCouldBeNULL) {
-    for (unsigned int i = 0; i < funcs.size(); ++i)
-        Error(funcs[i]->pos, "Candidate function:");
+    for (unsigned int i = 0; i < funcs.size(); ++i) {
+        const FunctionType *ft = 
+            dynamic_cast<const FunctionType *>(funcs[i]->type);
+        Assert(ft != NULL);
+        Error(funcs[i]->pos, "Candidate function: %s.", ft->GetString().c_str());
+    }
 
     std::string passedTypes = "Passed types: (";
     for (unsigned int i = 0; i < argTypes.size(); ++i) {
@@ -7378,283 +7550,221 @@ lPrintOverloadCandidates(SourcePos pos, const std::vector<Symbol *> &funcs,
     Error(pos, "%s", passedTypes.c_str());
 }
 
-             
-/** Helper function used for function overload resolution: returns zero
-    cost if the call argument's type exactly matches the function argument
-    type (modulo a conversion to a const type if needed), otherwise reports
-    failure.
- */ 
-static int
-lExactMatch(const Type *callType, const Type *funcArgType) {
-    if (dynamic_cast<const ReferenceType *>(callType) == NULL)
-        callType = callType->GetAsNonConstType();
-    if (dynamic_cast<const ReferenceType *>(funcArgType) != NULL && 
-        dynamic_cast<const ReferenceType *>(callType) == NULL)
-        callType = new ReferenceType(callType);
 
-    return Type::Equal(callType, funcArgType) ? 0 : -1;
+static bool
+lIsMatchToNonConstReference(const Type *callType, const Type *funcArgType) {
+    return (dynamic_cast<const ReferenceType *>(funcArgType) &&
+            (funcArgType->IsConstType() == false) &&
+            Type::Equal(callType, funcArgType->GetReferenceTarget()));
 }
 
 
-/** Helper function used for function overload resolution: returns a cost
-    of 1 if the call argument type and the function argument type match,
-    modulo conversion to a reference type if needed.
- */
-static int
-lMatchIgnoringReferences(const Type *callType, const Type *funcArgType) {
-    int prev = lExactMatch(callType, funcArgType);
-    if (prev != -1)
-        return prev;
-
-    callType = callType->GetReferenceTarget();
-    if (funcArgType->IsConstType())
-        callType = callType->GetAsConstType();
-
-    return Type::Equal(callType,
-                       funcArgType->GetReferenceTarget()) ? 1 : -1;
+static bool
+lIsMatchToNonConstReferenceUnifToVarying(const Type *callType,
+                                         const Type *funcArgType) {
+    return (dynamic_cast<const ReferenceType *>(funcArgType) &&
+            (funcArgType->IsConstType() == false) &&
+            Type::Equal(callType->GetAsVaryingType(),
+                        funcArgType->GetReferenceTarget()));
 }
 
-/** Helper function used for function overload resolution: returns a cost
-    of 1 if converting the argument to the call type only requires a type
-    conversion that won't lose information.  Otherwise reports failure.
-*/
-static int
-lMatchWithTypeWidening(const Type *callType, const Type *funcArgType) {
-    int prev = lMatchIgnoringReferences(callType, funcArgType);
-    if (prev != -1)
-        return prev;
-
+/** Helper function used for function overload resolution: returns true if
+    converting the argument to the call type only requires a type
+    conversion that won't lose information.  Otherwise return false.
+  */
+static bool
+lIsMatchWithTypeWidening(const Type *callType, const Type *funcArgType) {
     const AtomicType *callAt = dynamic_cast<const AtomicType *>(callType);
     const AtomicType *funcAt = dynamic_cast<const AtomicType *>(funcArgType);
     if (callAt == NULL || funcAt == NULL)
-        return -1;
+        return false;
 
     if (callAt->IsUniformType() != funcAt->IsUniformType())
-        return -1;
+        return false;
 
     switch (callAt->basicType) {
     case AtomicType::TYPE_BOOL:
-        return 1;
+        return true;
     case AtomicType::TYPE_INT8:
     case AtomicType::TYPE_UINT8:
-        return (funcAt->basicType != AtomicType::TYPE_BOOL) ? 1 : -1;
+        return (funcAt->basicType != AtomicType::TYPE_BOOL);
     case AtomicType::TYPE_INT16:
     case AtomicType::TYPE_UINT16:
         return (funcAt->basicType != AtomicType::TYPE_BOOL &&
                 funcAt->basicType != AtomicType::TYPE_INT8 &&
-                funcAt->basicType != AtomicType::TYPE_UINT8) ? 1 : -1;
+                funcAt->basicType != AtomicType::TYPE_UINT8);
     case AtomicType::TYPE_INT32:
     case AtomicType::TYPE_UINT32:
         return (funcAt->basicType == AtomicType::TYPE_INT32 ||
                 funcAt->basicType == AtomicType::TYPE_UINT32 ||
                 funcAt->basicType == AtomicType::TYPE_INT64 ||
-                funcAt->basicType == AtomicType::TYPE_UINT64) ? 1 : -1;
+                funcAt->basicType == AtomicType::TYPE_UINT64);
     case AtomicType::TYPE_FLOAT:
-        return (funcAt->basicType == AtomicType::TYPE_DOUBLE) ? 1 : -1;
+        return (funcAt->basicType == AtomicType::TYPE_DOUBLE);
     case AtomicType::TYPE_INT64:
     case AtomicType::TYPE_UINT64:
         return (funcAt->basicType == AtomicType::TYPE_INT64 ||
-                funcAt->basicType == AtomicType::TYPE_UINT64) ? 1 : -1;
+                funcAt->basicType == AtomicType::TYPE_UINT64);
     case AtomicType::TYPE_DOUBLE:
-        return -1;
+        return false;
     default:
         FATAL("Unhandled atomic type");
-        return -1;
+        return false;
     }
 }
 
 
-/** Helper function used for function overload resolution: returns a cost
-    of 1 if the call argument type and the function argument type match if
-    we only do a uniform -> varying type conversion but otherwise have
-    exactly the same type.
+/** Helper function used for function overload resolution: returns true if
+    the call argument type and the function argument type match if we only
+    do a uniform -> varying type conversion but otherwise have exactly the
+    same type.
  */
-static int
-lMatchIgnoringUniform(const Type *callType, const Type *funcArgType) {
-    int prev = lMatchWithTypeWidening(callType, funcArgType);
-    if (prev != -1)
-        return prev;
-
-    if (dynamic_cast<const ReferenceType *>(callType) == NULL)
-        callType = callType->GetAsNonConstType();
-
+static bool
+lIsMatchWithUniformToVarying(const Type *callType, const Type *funcArgType) {
     return (callType->IsUniformType() && 
             funcArgType->IsVaryingType() &&
-            Type::Equal(callType->GetAsVaryingType(), funcArgType)) ? 1 : -1;
+            Type::EqualIgnoringConst(callType->GetAsVaryingType(), funcArgType));
 }
 
 
-/** Helper function used for function overload resolution: returns a cost
-    of 1 if we can type convert from the call argument type to the function
+/** Helper function used for function overload resolution: returns true if
+    we can type convert from the call argument type to the function
     argument type, but without doing a uniform -> varying conversion.
  */
-static int
-lMatchWithTypeConvSameVariability(const Type *callType,
-                                  const Type *funcArgType) {
-    int prev = lMatchIgnoringUniform(callType, funcArgType);
-    if (prev != -1)
-        return prev;
-
-    if (CanConvertTypes(callType, funcArgType) &&
-        (callType->IsUniformType() == funcArgType->IsUniformType()))
-        return 1;
-    else
-        return -1;
+static bool
+lIsMatchWithTypeConvSameVariability(const Type *callType,
+                                    const Type *funcArgType) {
+    return (CanConvertTypes(callType, funcArgType) &&
+            (callType->GetVariability() == funcArgType->GetVariability()));
 }
 
 
-/** Helper function used for function overload resolution: returns a cost
-    of 1 if there is any type conversion that gets us from the caller
-    argument type to the function argument type.
+/* Returns the set of function overloads that are potential matches, given
+   argCount values being passed as arguments to the function call.
  */
-static int
-lMatchWithTypeConv(const Type *callType, const Type *funcArgType) {
-    int prev = lMatchWithTypeConvSameVariability(callType, funcArgType);
-    if (prev != -1)
-        return prev;
-        
-    return CanConvertTypes(callType, funcArgType) ? 0 : -1;
-}
-
-
-/** Given a set of potential matching functions and their associated cost,
-    return the one with the lowest cost, if unique.  Otherwise, if multiple
-    functions match with the same cost, return NULL.
- */
-static Symbol *
-lGetBestMatch(std::vector<std::pair<int, Symbol *> > &matches) {
-    Assert(matches.size() > 0);
-    int minCost = matches[0].first;
-
-    for (unsigned int i = 1; i < matches.size(); ++i)
-        minCost = std::min(minCost, matches[i].first);
-
-    Symbol *match = NULL;
-    for (unsigned int i = 0; i < matches.size(); ++i) {
-        if (matches[i].first == minCost) {
-            if (match != NULL)
-                // multiple things had the same cost
-                return NULL;
-            else
-                match = matches[i].second;
-        }
-    }
-    return match;
-}
-
-
-/** See if we can find a single function from the set of overload options
-    based on the predicate function passed in.  Returns true if no more
-    tries should be made to find a match, either due to success from
-    finding a single overloaded function that matches or failure due to
-    finding multiple ambiguous matches.
- */
-bool
-FunctionSymbolExpr::tryResolve(int (*matchFunc)(const Type *, const Type *),
-                               SourcePos argPos,
-                               const std::vector<const Type *> &callTypes,
-                               const std::vector<bool> *argCouldBeNULL) {
-    const char *funName = candidateFunctions.front()->name.c_str();
-
-    std::vector<std::pair<int, Symbol *> > matches;
-    std::vector<Symbol *>::iterator iter;
-    for (iter = candidateFunctions.begin(); 
-         iter != candidateFunctions.end(); ++iter) {
-        // Loop over the set of candidate functions and try each one
-        Symbol *candidateFunction = *iter;
+std::vector<Symbol *>
+FunctionSymbolExpr::getCandidateFunctions(int argCount) const {
+    std::vector<Symbol *> ret;
+    for (int i = 0; i < (int)candidateFunctions.size(); ++i) {
         const FunctionType *ft = 
-            dynamic_cast<const FunctionType *>(candidateFunction->type);
+            dynamic_cast<const FunctionType *>(candidateFunctions[i]->type);
         Assert(ft != NULL);
 
         // There's no way to match if the caller is passing more arguments
         // than this function instance takes.
-        if ((int)callTypes.size() > ft->GetNumParameters())
+        if (argCount > ft->GetNumParameters())
             continue;
 
-        int i;
-        // Note that we're looping over the caller arguments, not the
-        // function arguments; it may be ok to have more arguments to the
-        // function than are passed, if the function has default argument
-        // values.  This case is handled below.
-        int cost = 0;
-        for (i = 0; i < (int)callTypes.size(); ++i) {
-            // This may happen if there's an error earlier in compilation.
-            // It's kind of a silly to redundantly discover this for each
-            // potential match versus detecting this earlier in the
-            // matching process and just giving up.
-            const Type *paramType = ft->GetParameterType(i);
+        // Not enough arguments, and no default argument value to save us
+        if (argCount < ft->GetNumParameters() &&
+            ft->GetParameterDefault(argCount) == NULL)
+            continue;
 
-            if (callTypes[i] == NULL || paramType == NULL ||
-                dynamic_cast<const FunctionType *>(callTypes[i]) != NULL)
-                return false;
-
-            int argCost = matchFunc(callTypes[i], paramType);
-            if (argCost == -1) {
-                if (argCouldBeNULL != NULL && (*argCouldBeNULL)[i] == true &&
-                    dynamic_cast<const PointerType *>(paramType) != NULL)
-                    // If the passed argument value is zero and this is a
-                    // pointer type, then it can convert to a NULL value of
-                    // that pointer type.
-                    argCost = 0;
-                else
-                    // If the predicate function returns -1, we have failed no
-                    // matter what else happens, so we stop trying
-                    break;
-            }
-            cost += argCost;
-        }
-        if (i == (int)callTypes.size()) {
-            // All of the arguments matched!
-            if (i == ft->GetNumParameters())
-                // And we have exactly as many arguments as the function
-                // wants, so we're done.
-                matches.push_back(std::make_pair(cost, candidateFunction));
-            else if (i < ft->GetNumParameters() && 
-                     ft->GetParameterDefault(i) != NULL)
-                // Otherwise we can still make it if there are default
-                // arguments for the rest of the arguments!  Because in
-                // Module::AddFunction() we have verified that once the
-                // default arguments start, then all of the following ones
-                // have them as well.  Therefore, we just need to check if
-                // the arg we stopped at has a default value and we're
-                // done.
-                matches.push_back(std::make_pair(cost, candidateFunction));
-            // otherwise, we don't have a match
-        }
+        // Success
+        ret.push_back(candidateFunctions[i]);
     }
+    return ret;
+}
 
-    if (matches.size() == 0)
+
+static bool
+lArgIsPointerType(const Type *type) {
+    if (dynamic_cast<const PointerType *>(type) != NULL)
+        return true;
+
+    const ReferenceType *rt = dynamic_cast<const ReferenceType *>(type);
+    if (rt == NULL)
         return false;
-    else if ((matchingFunc = lGetBestMatch(matches)) != NULL)
-        // We have a match!
-        return true;
-    else {
-        Error(pos, "Multiple overloaded instances of function \"%s\" matched.",
-              funName);
 
-        // select the matches that have the lowest cost
-        std::vector<Symbol *> bestMatches;
-        int minCost = matches[0].first;
-        for (unsigned int i = 1; i < matches.size(); ++i)
-            minCost = std::min(minCost, matches[i].first);
-        for (unsigned int i = 0; i < matches.size(); ++i)
-            if (matches[i].first == minCost)
-                bestMatches.push_back(matches[i].second);
+    const Type *t = rt->GetReferenceTarget();
+    return (dynamic_cast<const PointerType *>(t) != NULL);
+}
 
-        // And print a useful error message
-        lPrintOverloadCandidates(argPos, bestMatches, callTypes, argCouldBeNULL);
 
-        // Stop trying to find more matches after an ambigious set of
-        // matches.
-        return true;
+/** This function computes the value of a cost function that represents the
+    cost of calling a function of the given type with arguments of the
+    given types.  If it's not possible to call the function, regardless of
+    any type conversions applied, a cost of -1 is returned.
+ */
+int
+FunctionSymbolExpr::computeOverloadCost(const FunctionType *ftype,
+                                        const std::vector<const Type *> &argTypes,
+                                        const std::vector<bool> *argCouldBeNULL,
+                                        const std::vector<bool> *argIsConstant) {
+    int costSum = 0;
+
+    // In computing the cost function, we only worry about the actual
+    // argument types--using function default parameter values is free for
+    // the purposes here...
+    for (int i = 0; i < (int)argTypes.size(); ++i) {
+        // The cost imposed by this argument will be a multiple of
+        // costScale, which has a value set so that for each of the cost
+        // buckets, even if all of the function arguments undergo the next
+        // lower-cost conversion, the sum of their costs will be less than
+        // a single instance of the next higher-cost conversion.
+        int costScale = argTypes.size() + 1;
+
+        const Type *fargType = ftype->GetParameterType(i);
+        const Type *callType = argTypes[i];
+
+        if (Type::Equal(callType, fargType))
+            // Perfect match: no cost
+            costSum += 0;
+        else if (argCouldBeNULL && (*argCouldBeNULL)[i] &&
+                 lArgIsPointerType(fargType))
+            // Passing NULL to a pointer-typed parameter is also a no-cost
+            // operation
+            costSum += 0;
+        else {
+            // If the argument is a compile-time constant, we'd like to
+            // count the cost of various conversions as much lower than the
+            // cost if it wasn't--so scale up the cost when this isn't the
+            // case..
+            if (argIsConstant == NULL || (*argIsConstant)[i] == false)
+                costScale *= 128;
+
+            // For convenience, normalize to non-const types (except for
+            // references, where const-ness matters).  For all other types,
+            // we're passing by value anyway, so const doesn't matter.
+            const Type *callTypeNC = callType, *fargTypeNC = fargType;
+            if (dynamic_cast<const ReferenceType *>(callType) == NULL)
+                callTypeNC = callType->GetAsNonConstType();
+            if (dynamic_cast<const ReferenceType *>(fargType) == NULL)
+                fargTypeNC = fargType->GetAsNonConstType();
+                
+            if (Type::Equal(callTypeNC, fargTypeNC))
+                // Exact match (after dealing with references, above)
+                costSum += 1 * costScale;
+            // note: orig fargType for the next two...
+            else if (lIsMatchToNonConstReference(callTypeNC, fargType))
+                costSum += 2 * costScale;
+            else if (lIsMatchToNonConstReferenceUnifToVarying(callTypeNC, fargType))
+                costSum += 4 * costScale;
+            else if (lIsMatchWithTypeWidening(callTypeNC, fargTypeNC))
+                costSum += 8 * costScale;
+            else if (lIsMatchWithUniformToVarying(callTypeNC, fargTypeNC))
+                costSum += 16 * costScale;
+            else if (lIsMatchWithTypeConvSameVariability(callTypeNC, fargTypeNC))
+                costSum += 32 * costScale;
+            else if (CanConvertTypes(callTypeNC, fargTypeNC))
+                costSum += 64 * costScale;
+            else
+                // Failure--no type conversion possible...
+                return -1;
+        }
     }
+
+    return costSum;
 }
 
 
 bool
 FunctionSymbolExpr::ResolveOverloads(SourcePos argPos,
                                      const std::vector<const Type *> &argTypes,
-                                     const std::vector<bool> *argCouldBeNULL) {
+                                     const std::vector<bool> *argCouldBeNULL,
+                                     const std::vector<bool> *argIsConstant) {
+    const char *funName = candidateFunctions.front()->name.c_str();
+
     triedToResolve = true;
 
     // Functions with names that start with "__" should only be various
@@ -7665,45 +7775,67 @@ FunctionSymbolExpr::ResolveOverloads(SourcePos argPos,
     // called.
     bool exactMatchOnly = (name.substr(0,2) == "__");
 
-    // Is there an exact match that doesn't require any argument type
-    // conversion (other than converting type -> reference type)?
-    if (tryResolve(lExactMatch, argPos, argTypes, argCouldBeNULL))
-        return true;
+    // First, find the subset of overload candidates that take the same
+    // number of arguments as have parameters (including functions that
+    // take more arguments but have defaults starting no later than after
+    // our last parameter).
+    std::vector<Symbol *> actualCandidates = 
+        getCandidateFunctions(argTypes.size());
 
-    if (exactMatchOnly == false) {
-        // Try to find a single match ignoring references
-        if (tryResolve(lMatchIgnoringReferences, argPos, argTypes, 
-                       argCouldBeNULL))
-            return true;
+    int bestMatchCost = 1<<30;
+    std::vector<Symbol *> matches;
+    std::vector<int> candidateCosts;
 
-        // Try to find an exact match via type widening--i.e. int8 ->
-        // int16, etc.--things that don't lose data.
-        if (tryResolve(lMatchWithTypeWidening, argPos, argTypes, argCouldBeNULL))
-            return true;
+    if (actualCandidates.size() == 0)
+        goto failure;
 
-        // Next try to see if there's a match via just uniform -> varying
-        // promotions.
-        if (tryResolve(lMatchIgnoringUniform, argPos, argTypes, argCouldBeNULL))
-            return true;
-
-        // Try to find a match via type conversion, but don't change
-        // unif->varying
-        if (tryResolve(lMatchWithTypeConvSameVariability, argPos, argTypes,
-                       argCouldBeNULL))
-            return true;
-    
-        // Last chance: try to find a match via arbitrary type conversion.
-        if (tryResolve(lMatchWithTypeConv, argPos, argTypes, argCouldBeNULL))
-            return true;
+    // Compute the cost for calling each of the candidate functions
+    for (int i = 0; i < (int)actualCandidates.size(); ++i) {
+        const FunctionType *ft = 
+            dynamic_cast<const FunctionType *>(actualCandidates[i]->type);
+        Assert(ft != NULL);
+        candidateCosts.push_back(computeOverloadCost(ft, argTypes,
+                                                     argCouldBeNULL,
+                                                     argIsConstant));
     }
 
-    // failure :-(
-    const char *funName = candidateFunctions.front()->name.c_str();
-    Error(pos, "Unable to find matching overload for call to function \"%s\"%s.",
-          funName, exactMatchOnly ? " only considering exact matches" : "");
-    lPrintOverloadCandidates(argPos, candidateFunctions, argTypes, 
-                             argCouldBeNULL);
-    return false;
+    // Find the best cost, and then the candidate or candidates that have
+    // that cost.
+    for (int i = 0; i < (int)candidateCosts.size(); ++i) {
+        if (candidateCosts[i] != -1 && candidateCosts[i] < bestMatchCost)
+            bestMatchCost = candidateCosts[i];
+    }
+    // None of the candidates matched
+    if (bestMatchCost == (1<<30))
+        goto failure;
+    for (int i = 0; i < (int)candidateCosts.size(); ++i) {
+        if (candidateCosts[i] == bestMatchCost)
+            matches.push_back(actualCandidates[i]);
+    }
+
+    if (matches.size() == 1) {
+        // Only one match: success
+        matchingFunc = matches[0];
+        return true;
+    }
+    else if (matches.size() > 1) {
+        // Multiple matches: ambiguous
+        Error(pos, "Multiple overloaded functions matched call to function "
+              "\"%s\"%s.", funName, 
+              exactMatchOnly ? " only considering exact matches" : "");
+        lPrintOverloadCandidates(argPos, matches, argTypes, argCouldBeNULL);
+        return false;
+    }
+    else {
+        // No matches at all
+ failure:
+        Error(pos, "Unable to find any matching overload for call to function "
+              "\"%s\"%s.", funName, 
+              exactMatchOnly ? " only considering exact matches" : "");
+        lPrintOverloadCandidates(argPos, candidateFunctions, argTypes, 
+                                 argCouldBeNULL);
+        return false;
+    }
 }
 
 
@@ -7788,7 +7920,7 @@ NullPointerExpr::GetConstant(const Type *type) const {
     if (pt == NULL)
         return NULL;
 
-    LLVM_TYPE_CONST llvm::Type *llvmType = type->LLVMType(g->ctx);
+    llvm::Type *llvmType = type->LLVMType(g->ctx);
     if (llvmType == NULL) {
         Assert(m->errorCount > 0);
         return NULL;
@@ -7840,7 +7972,7 @@ NewExpr::NewExpr(int typeQual, const Type *t, Expr *init, Expr *count,
         // varying new.
         isVarying = (typeQual == 0) || (typeQual & TYPEQUAL_VARYING);
 
-    if (allocType != NULL && allocType->HasUnboundVariability())
+    if (allocType != NULL)
         allocType = allocType->ResolveUnboundVariability(Variability::Uniform);
 }
 
@@ -7934,7 +8066,7 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const {
                 // Initialize the memory pointed to by the pointer for the
                 // current lane.
                 ctx->SetCurrentBasicBlock(bbInit);
-                LLVM_TYPE_CONST llvm::Type *ptrType = 
+                llvm::Type *ptrType = 
                     retType->GetAsUniformType()->LLVMType(g->ctx);
                 llvm::Value *ptr = ctx->IntToPtrInst(p, ptrType);
                 InitSymbol(ptr, allocType, initExpr, ctx, pos);
@@ -7950,7 +8082,7 @@ NewExpr::GetValue(FunctionEmitContext *ctx) const {
         // For uniform news, we just need to cast the void * to be a
         // pointer of the return type and to run the code for initializers,
         // if present.
-        LLVM_TYPE_CONST llvm::Type *ptrType = retType->LLVMType(g->ctx);
+        llvm::Type *ptrType = retType->LLVMType(g->ctx);
         ptrValue = ctx->BitCastInst(ptrValue, ptrType, "cast_new_ptr");
 
         if (initExpr != NULL)
diff --git a/expr.h b/expr.h
index 5c59ae83..f7d112b9 100644
--- a/expr.h
+++ b/expr.h
@@ -584,6 +584,7 @@ public:
     Expr *TypeCheck();
     Expr *Optimize();
     int EstimateCost() const;
+    llvm::Constant *GetConstant(const Type *type) const;
 
     Expr *expr;
 };
@@ -651,20 +652,26 @@ public:
         function overloading, this method resolves which actual function
         the arguments match best.  If the argCouldBeNULL parameter is
         non-NULL, each element indicates whether the corresponding argument
-        is the number zero, indicating that it could be a NULL pointer.
-        This parameter may be NULL (for cases where overload resolution is
-        being done just given type information without the parameter
-        argument expressions being available.  It returns true on success.
+        is the number zero, indicating that it could be a NULL pointer, and
+        if argIsConstant is non-NULL, each element indicates whether the
+        corresponding argument is a compile-time constant value.  Both of
+        these parameters may be NULL (for cases where overload resolution
+        is being done just given type information without the parameter
+        argument expressions being available.  This function returns true
+        on success.
      */
     bool ResolveOverloads(SourcePos argPos,
                           const std::vector<const Type *> &argTypes,
-                          const std::vector<bool> *argCouldBeNULL = NULL);
+                          const std::vector<bool> *argCouldBeNULL = NULL,
+                          const std::vector<bool> *argIsConstant = NULL);
     Symbol *GetMatchingFunction();
 
 private:
-    bool tryResolve(int (*matchFunc)(const Type *, const Type *),
-                    SourcePos argPos, const std::vector<const Type *> &argTypes,
-                    const std::vector<bool> *argCouldBeNULL);
+    std::vector<Symbol *> getCandidateFunctions(int argCount) const;
+    static int computeOverloadCost(const FunctionType *ftype,
+                                   const std::vector<const Type *> &argTypes,
+                                   const std::vector<bool> *argCouldBeNULL,
+                            const std::vector<bool> *argIsConstant);
 
     /** Name of the function that is being called. */
     std::string name;
diff --git a/func.cpp b/func.cpp
index c1ca7ee6..29dd9ecf 100644
--- a/func.cpp
+++ b/func.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2011, Intel Corporation
+  Copyright (c) 2011-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -66,9 +66,8 @@
 #include <llvm/Support/ToolOutputFile.h>
 #include <llvm/Assembly/PrintModulePass.h>
 
-Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
+Function::Function(Symbol *s, Stmt *c) {
     sym = s;
-    args = a;
     code = c;
 
     maskSymbol = m->symbolTable->LookupVariable("__mask");
@@ -104,9 +103,17 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
     const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
     Assert(type != NULL);
 
-    for (unsigned int i = 0; i < args.size(); ++i)
-        if (dynamic_cast<const ReferenceType *>(args[i]->type) == NULL)
-            args[i]->parentFunction = this;
+    for (int i = 0; i < type->GetNumParameters(); ++i) {
+        const char *paramName = type->GetParameterName(i).c_str();
+        Symbol *sym = m->symbolTable->LookupVariable(paramName);
+        if (sym == NULL)
+            Assert(strncmp(paramName, "__anon_parameter_", 17) == 0);
+        args.push_back(sym);
+
+        const Type *t = type->GetParameterType(i);
+        if (sym != NULL && dynamic_cast<const ReferenceType *>(t) == NULL)
+            sym->parentFunction = this;
+    }
 
     if (type->isTask) {
         threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
@@ -145,7 +152,8 @@ Function::GetType() const {
     'mem2reg' pass will in turn promote to SSA registers..
  */
 static void
-lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol *> &args,
+lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const 
+                     std::vector<Symbol *> &args,
                      FunctionEmitContext *ctx) {
     // We expect the argument structure to come in as a poitner to a
     // structure.  Confirm and figure out its type here.
@@ -157,9 +165,13 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
         llvm::dyn_cast<const llvm::StructType>(pt->getElementType());
 
     // Get the type of the argument we're copying in and its Symbol pointer
-    LLVM_TYPE_CONST llvm::Type *argType = argStructType->getElementType(i);
+    llvm::Type *argType = argStructType->getElementType(i);
     Symbol *sym = args[i];
 
+    if (sym == NULL)
+        // anonymous parameter, so don't worry about it
+        return;
+
     // allocate space to copy the parameter in to
     sym->storagePtr = ctx->AllocaInst(argType, sym->name.c_str());
 
@@ -240,6 +252,10 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
         llvm::Function::arg_iterator argIter = function->arg_begin(); 
         for (unsigned int i = 0; i < args.size(); ++i, ++argIter) {
             Symbol *sym = args[i];
+            if (sym == NULL)
+                // anonymous function parameter
+                continue;
+
             argIter->setName(sym->name.c_str());
 
             // Allocate stack storage for the parameter and emit code
@@ -419,7 +435,7 @@ Function::GenerateIR() {
         Assert(type != NULL);
         if (type->isExported) {
             if (!type->isTask) {
-                LLVM_TYPE_CONST llvm::FunctionType *ftype = 
+                llvm::FunctionType *ftype = 
                     type->LLVMFunctionType(g->ctx);
                 llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
                 std::string functionName = sym->name;
diff --git a/func.h b/func.h
index d0bf0731..6d0527fc 100644
--- a/func.h
+++ b/func.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2011, Intel Corporation
+  Copyright (c) 2011-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -43,7 +43,7 @@
 
 class Function {
 public:
-    Function(Symbol *sym, const std::vector<Symbol *> &args, Stmt *code);
+    Function(Symbol *sym, Stmt *code);
 
     const Type *GetReturnType() const;
     const FunctionType *GetType() const;
diff --git a/ispc.cpp b/ispc.cpp
index 49623be4..dce3ed77 100644
--- a/ispc.cpp
+++ b/ispc.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -70,9 +70,88 @@ Module *m;
 ///////////////////////////////////////////////////////////////////////////
 // Target
 
+#ifndef ISPC_IS_WINDOWS
+static void __cpuid(int info[4], int infoType) {
+    __asm__ __volatile__ ("cpuid"
+                          : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
+                          : "0" (infoType));
+}
+
+/* Save %ebx in case it's the PIC register */
+static void __cpuidex(int info[4], int level, int count) {
+  __asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t"
+                        "cpuid\n\t"
+                        "xchg{l}\t{%%}ebx, %1\n\t"
+                        : "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3])
+                        : "0" (level), "2" (count));
+}
+#endif // ISPC_IS_WINDOWS
+
+
+static const char *
+lGetSystemISA() {
+    int info[4];
+    __cpuid(info, 1);
+
+    if ((info[2] & (1 << 28)) != 0) {
+        // AVX1 for sure. Do we have AVX2?
+        // Call cpuid with eax=7, ecx=0
+        __cpuidex(info, 7, 0);
+        if ((info[1] & (1 << 5)) != 0)
+            return "avx2";
+        else
+            return "avx";
+    }
+    else if ((info[2] & (1 << 19)) != 0)
+        return "sse4";
+    else if ((info[3] & (1 << 26)) != 0)
+        return "sse2";
+    else {
+        fprintf(stderr, "Unable to detect supported SSE/AVX ISA.  Exiting.\n");
+        exit(1);
+    }
+}
+
+
+static const char *supportedCPUs[] = { 
+    "atom", "penryn", "core2", "corei7",
+#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
+    "corei7-avx"
+#endif
+};
+
+
 bool
 Target::GetTarget(const char *arch, const char *cpu, const char *isa,
                   bool pic, Target *t) {
+    if (isa == NULL) {
+        if (cpu != NULL) {
+            // If a CPU was specified explicitly, try to pick the best
+            // possible ISA based on that.
+#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
+            if (!strcmp(cpu, "sandybridge") ||
+                !strcmp(cpu, "corei7-avx"))
+                isa = "avx";
+            else
+#endif
+                  if (!strcmp(cpu, "corei7") ||
+                      !strcmp(cpu, "penryn"))
+                isa = "sse4";
+            else
+                isa = "sse2";
+            fprintf(stderr, "Notice: no --target specified on command-line.  "
+                    "Using ISA \"%s\" based on specified CPU \"%s\".\n", isa,
+                    cpu);
+        }
+        else {
+            // No CPU and no ISA, so use CPUID to figure out what this CPU
+            // supports.
+            isa = lGetSystemISA();
+            fprintf(stderr, "Notice: no --target specified on command-line.  "
+                    "Using system ISA \"%s\".\n", isa);
+        }
+    }
+
     if (cpu == NULL) {
         std::string hostCPU = llvm::sys::getHostCPUName();
         if (hostCPU.size() > 0)
@@ -82,19 +161,24 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
             cpu = "generic";
         }
     }
+    else {
+        bool foundCPU = false;
+        for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0])); 
+             ++i) {
+            if (!strcmp(cpu, supportedCPUs[i])) {
+                foundCPU = true;
+                break;
+            }
+        }
+        if (foundCPU == false) {
+            fprintf(stderr, "Error: CPU type \"%s\" unknown. Supported CPUs: "
+                    "%s.\n", cpu, SupportedTargetCPUs().c_str());
+            return false;
+        }
+    }
+
     t->cpu = cpu;
 
-    if (isa == NULL) {
-        if (!strcasecmp(cpu, "atom"))
-            isa = "sse2";
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
-        else if (!strcasecmp(cpu, "sandybridge") ||
-                 !strcasecmp(cpu, "corei7-avx"))
-            isa = "avx";
-#endif // LLVM_3_0
-        else
-            isa = "sse4";
-    }
     if (arch == NULL)
         arch = "x86-64";
 
@@ -249,17 +333,16 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
 }
 
 
-const char *
+std::string
 Target::SupportedTargetCPUs() {
-    return "atom, barcelona, core2, corei7, "
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
-        "corei7-avx, "
-#endif
-        "istanbul, nocona, penryn, "
-#ifdef LLVM_2_9
-        "sandybridge, "
-#endif
-        "westmere";
+    std::string ret;
+    int count = sizeof(supportedCPUs) / sizeof(supportedCPUs[0]);
+    for (int i = 0; i < count; ++i) {
+        ret += supportedCPUs[i];
+        if (i != count - 1)
+            ret += ", ";
+    }
+    return ret;
 }
 
 
@@ -318,8 +401,15 @@ Target::GetTargetMachine() const {
 #if defined(LLVM_3_1svn)
     std::string featuresString = attributes;
     llvm::TargetOptions options;
+#if 0
+    // This was breaking e.g. round() on SSE2, where the code we want to
+    // run wants to do:
+    // x += 0x1.0p23f;
+    // x -= 0x1.0p23f;
+    // But then LLVM was optimizing this away...
     if (g->opt.fastMath == true)
         options.UnsafeFPMath = 1;
+#endif
     llvm::TargetMachine *targetMachine = 
         target->createTargetMachine(triple, cpu, featuresString, options,
                                     relocModel);
@@ -367,7 +457,7 @@ Target::GetISAString() const {
 
 
 static bool
-lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) {
+lGenericTypeLayoutIndeterminate(llvm::Type *type) {
     if (type->isPrimitiveType() || type->isIntegerTy())
         return false;
 
@@ -376,18 +466,18 @@ lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) {
         type == LLVMTypes::Int1VectorType)
         return true;
 
-    LLVM_TYPE_CONST llvm::ArrayType *at = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(type);
+    llvm::ArrayType *at = 
+        llvm::dyn_cast<llvm::ArrayType>(type);
     if (at != NULL)
         return lGenericTypeLayoutIndeterminate(at->getElementType());
 
-    LLVM_TYPE_CONST llvm::PointerType *pt = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(type);
+    llvm::PointerType *pt = 
+        llvm::dyn_cast<llvm::PointerType>(type);
     if (pt != NULL)
         return false;
 
-    LLVM_TYPE_CONST llvm::StructType *st =
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(type);
+    llvm::StructType *st =
+        llvm::dyn_cast<llvm::StructType>(type);
     if (st != NULL) {
         for (int i = 0; i < (int)st->getNumElements(); ++i)
             if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
@@ -395,18 +485,18 @@ lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) {
         return false;
     }
 
-    Assert(llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(type));
+    Assert(llvm::isa<llvm::VectorType>(type));
     return true;
 }
 
 
 llvm::Value *
-Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type, 
+Target::SizeOf(llvm::Type *type, 
                llvm::BasicBlock *insertAtEnd) {
     if (isa == Target::GENERIC &&
         lGenericTypeLayoutIndeterminate(type)) {
         llvm::Value *index[1] = { LLVMInt32(1) };
-        LLVM_TYPE_CONST llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
+        llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
         llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
 #if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
         llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
@@ -428,7 +518,9 @@ Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type,
 
     const llvm::TargetData *td = GetTargetMachine()->getTargetData();
     Assert(td != NULL);
-    uint64_t byteSize = td->getTypeSizeInBits(type) / 8;
+    uint64_t bitSize = td->getTypeSizeInBits(type);
+    Assert((bitSize % 8) == 0);
+    uint64_t byteSize = bitSize / 8;
     if (is32Bit || g->opt.force32BitAddressing)
         return LLVMInt32((int32_t)byteSize);
     else
@@ -437,12 +529,12 @@ Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type,
 
 
 llvm::Value *
-Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element,
+Target::StructOffset(llvm::Type *type, int element,
                      llvm::BasicBlock *insertAtEnd) {
     if (isa == Target::GENERIC && 
         lGenericTypeLayoutIndeterminate(type) == true) {
         llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
-        LLVM_TYPE_CONST llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
+        llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
         llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
 #if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
         llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
@@ -464,8 +556,8 @@ Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element,
 
     const llvm::TargetData *td = GetTargetMachine()->getTargetData();
     Assert(td != NULL);
-    LLVM_TYPE_CONST llvm::StructType *structType = 
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(type);
+    llvm::StructType *structType = 
+        llvm::dyn_cast<llvm::StructType>(type);
     Assert(structType != NULL);
     const llvm::StructLayout *sl = td->getStructLayout(structType);
     Assert(sl != NULL);
diff --git a/ispc.h b/ispc.h
index 360b7d99..d93788ec 100644
--- a/ispc.h
+++ b/ispc.h
@@ -38,10 +38,10 @@
 #ifndef ISPC_H
 #define ISPC_H
 
-#define ISPC_VERSION "1.2.1dev"
+#define ISPC_VERSION "1.2.2dev"
 
-#if !defined(LLVM_2_9) && !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn)
-#error "Only LLVM 2.9, 3.0, and the 3.1 development branch are supported"
+#if !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn)
+#error "Only LLVM 3.0, and the 3.1 development branch are supported"
 #endif
 
 #if defined(_WIN32) || defined(_WIN64)
@@ -92,12 +92,6 @@ namespace llvm {
     class Value;
 }
 
-// llvm::Type *s are no longer const in llvm 3.0
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
-#define LLVM_TYPE_CONST
-#else
-#define LLVM_TYPE_CONST const
-#endif
 
 class ArrayType;
 class AST;
@@ -116,6 +110,15 @@ class SymbolTable;
 class Type;
 struct VariableDeclaration;
 
+enum StorageClass {
+    SC_NONE,
+    SC_EXTERN,
+    SC_STATIC,
+    SC_TYPEDEF,
+    SC_EXTERN_C
+};
+
+
 /** @brief Representation of a range of positions in a source file.
 
     This class represents a range of characters in a source file
@@ -164,7 +167,7 @@ struct Target {
 
     /** Returns a comma-delimited string giving the names of the currently
         supported target CPUs. */
-    static const char *SupportedTargetCPUs();
+    static std::string SupportedTargetCPUs();
 
     /** Returns a comma-delimited string giving the names of the currently
         supported target architectures. */
@@ -182,13 +185,13 @@ struct Target {
     const char *GetISAString() const;
 
     /** Returns the size of the given type */
-    llvm::Value *SizeOf(LLVM_TYPE_CONST llvm::Type *type,
+    llvm::Value *SizeOf(llvm::Type *type,
                         llvm::BasicBlock *insertAtEnd);
 
     /** Given a structure type and an element number in the structure,
         returns a value corresponding to the number of bytes from the start
         of the structure where the element is located. */
-    llvm::Value *StructOffset(LLVM_TYPE_CONST llvm::Type *type,
+    llvm::Value *StructOffset(llvm::Type *type,
                               int element, llvm::BasicBlock *insertAtEnd);
 
     /** llvm Target object representing this target. */
diff --git a/lex.ll b/lex.ll
index 4130372f..96c19d1d 100644
--- a/lex.ll
+++ b/lex.ll
@@ -43,6 +43,7 @@
 #include <stdint.h>
 
 static uint64_t lParseBinary(const char *ptr, SourcePos pos, char **endPtr);
+static int lParseInteger(bool dotdotdot);
 static void lCComment(SourcePos *);
 static void lCppComment(SourcePos *);
 static void lHandleCppHash(SourcePos *);
@@ -322,7 +323,8 @@ inline int ispcRand() {
 %option nounistd
 
 WHITESPACE [ \t\r]+
-INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[kMG]?
+INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[uUlL]*[kMG]?[uUlL]*
+INT_NUMBER_DOTDOTDOT (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[uUlL]*[kMG]?[uUlL]*\.\.\.
 FLOAT_NUMBER (([0-9]+|(([0-9]+\.[0-9]*[fF]?)|(\.[0-9]+)))([eE][-+]?[0-9]+)?[fF]?)
 HEX_FLOAT_NUMBER (0x[01](\.[0-9a-fA-F]*)?p[-+]?[0-9]+[fF]?)
 
@@ -406,53 +408,14 @@ L?\"(\\.|[^\\"])*\" { lStringConst(&yylval, &yylloc); return TOKEN_STRING_LITERA
         return TOKEN_IDENTIFIER; 
 }
 
-{INT_NUMBER}+(u|U|l|L)*? { 
+{INT_NUMBER} { 
     RT;
-    int ls = 0, us = 0;
+    return lParseInteger(false);
+}
 
-    char *endPtr = NULL;
-    if (yytext[0] == '0' && yytext[1] == 'b')
-        yylval.intVal = lParseBinary(yytext+2, yylloc, &endPtr);
-    else {
-#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__)
-        yylval.intVal = _strtoui64(yytext, &endPtr, 0);
-#else
-        // FIXME: should use strtouq and then issue an error if we can't
-        // fit into 64 bits...
-        yylval.intVal = strtoull(yytext, &endPtr, 0);
-#endif
-    }
-
-    bool kilo = false, mega = false, giga = false;
-    for (; *endPtr; endPtr++) {
-        if (*endPtr == 'k')
-            kilo = true;
-        else if (*endPtr == 'M')
-            mega = true;
-        else if (*endPtr == 'G')
-            giga = true;        
-        else if (*endPtr == 'l' || *endPtr == 'L')
-            ls++;
-        else if (*endPtr == 'u' || *endPtr == 'U')
-            us++;
-    }
-    if (kilo)
-        yylval.intVal *= 1024;
-    if (mega)
-        yylval.intVal *= 1024*1024;
-    if (giga)
-        yylval.intVal *= 1024*1024*1024;
-
-    if (ls >= 2)
-        return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
-    else if (ls == 1)
-        return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
-
-    // See if we can fit this into a 32-bit integer...
-    if ((yylval.intVal & 0xffffffff) == yylval.intVal)
-        return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
-    else
-        return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
+{INT_NUMBER_DOTDOTDOT} {
+    RT;
+    return lParseInteger(true);
 }
 
 
@@ -562,6 +525,72 @@ lParseBinary(const char *ptr, SourcePos pos, char **endPtr) {
 }
 
 
+static int
+lParseInteger(bool dotdotdot) {
+    int ls = 0, us = 0;
+
+    char *endPtr = NULL;
+    if (yytext[0] == '0' && yytext[1] == 'b')
+        yylval.intVal = lParseBinary(yytext+2, yylloc, &endPtr);
+    else {
+#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__)
+        yylval.intVal = _strtoui64(yytext, &endPtr, 0);
+#else
+        // FIXME: should use strtouq and then issue an error if we can't
+        // fit into 64 bits...
+        yylval.intVal = strtoull(yytext, &endPtr, 0);
+#endif
+    }
+
+    bool kilo = false, mega = false, giga = false;
+    for (; *endPtr; endPtr++) {
+        if (*endPtr == 'k')
+            kilo = true;
+        else if (*endPtr == 'M')
+            mega = true;
+        else if (*endPtr == 'G')
+            giga = true;        
+        else if (*endPtr == 'l' || *endPtr == 'L')
+            ls++;
+        else if (*endPtr == 'u' || *endPtr == 'U')
+            us++;
+        else
+            Assert(dotdotdot && *endPtr == '.');
+    }
+    if (kilo)
+        yylval.intVal *= 1024;
+    if (mega)
+        yylval.intVal *= 1024*1024;
+    if (giga)
+        yylval.intVal *= 1024*1024*1024;
+
+    if (dotdotdot) {
+        if (ls >= 2)
+            return us ? TOKEN_UINT64DOTDOTDOT_CONSTANT : TOKEN_INT64DOTDOTDOT_CONSTANT;
+        else if (ls == 1)
+            return us ? TOKEN_UINT32DOTDOTDOT_CONSTANT : TOKEN_INT32DOTDOTDOT_CONSTANT;
+
+        // See if we can fit this into a 32-bit integer...
+        if ((yylval.intVal & 0xffffffff) == yylval.intVal)
+            return us ? TOKEN_UINT32DOTDOTDOT_CONSTANT : TOKEN_INT32DOTDOTDOT_CONSTANT;
+        else
+            return us ? TOKEN_UINT64DOTDOTDOT_CONSTANT : TOKEN_INT64DOTDOTDOT_CONSTANT;
+    }
+    else {
+        if (ls >= 2)
+            return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
+        else if (ls == 1)
+            return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
+
+        // See if we can fit this into a 32-bit integer...
+        if ((yylval.intVal & 0xffffffff) == yylval.intVal)
+            return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
+        else
+            return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
+    }
+}
+
+
 /** Handle a C-style comment in the source. 
  */
 static void
diff --git a/llvmutil.cpp b/llvmutil.cpp
index 2ba60dbb..5febaadf 100644
--- a/llvmutil.cpp
+++ b/llvmutil.cpp
@@ -43,44 +43,44 @@
 #include <set>
 #include <map>
 
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL;
-LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::PointerIntType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::BoolType = NULL;
+llvm::Type *LLVMTypes::VoidType = NULL;
+llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
+llvm::Type *LLVMTypes::PointerIntType = NULL;
+llvm::Type *LLVMTypes::BoolType = NULL;
 
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8Type = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16Type = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32Type = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64Type = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleType = NULL;
+llvm::Type *LLVMTypes::Int8Type = NULL;
+llvm::Type *LLVMTypes::Int16Type = NULL;
+llvm::Type *LLVMTypes::Int32Type = NULL;
+llvm::Type *LLVMTypes::Int64Type = NULL;
+llvm::Type *LLVMTypes::FloatType = NULL;
+llvm::Type *LLVMTypes::DoubleType = NULL;
 
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8PointerType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16PointerType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32PointerType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64PointerType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatPointerType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoublePointerType = NULL;
+llvm::Type *LLVMTypes::Int8PointerType = NULL;
+llvm::Type *LLVMTypes::Int16PointerType = NULL;
+llvm::Type *LLVMTypes::Int32PointerType = NULL;
+llvm::Type *LLVMTypes::Int64PointerType = NULL;
+llvm::Type *LLVMTypes::FloatPointerType = NULL;
+llvm::Type *LLVMTypes::DoublePointerType = NULL;
 
-LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::MaskType = NULL;
-LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::BoolVectorType = NULL;
+llvm::VectorType *LLVMTypes::MaskType = NULL;
+llvm::VectorType *LLVMTypes::BoolVectorType = NULL;
 
-LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int1VectorType = NULL;
-LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int8VectorType = NULL;
-LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int16VectorType = NULL;
-LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int32VectorType = NULL;
-LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int64VectorType = NULL;
-LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::FloatVectorType = NULL;
-LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::DoubleVectorType = NULL;
+llvm::VectorType *LLVMTypes::Int1VectorType = NULL;
+llvm::VectorType *LLVMTypes::Int8VectorType = NULL;
+llvm::VectorType *LLVMTypes::Int16VectorType = NULL;
+llvm::VectorType *LLVMTypes::Int32VectorType = NULL;
+llvm::VectorType *LLVMTypes::Int64VectorType = NULL;
+llvm::VectorType *LLVMTypes::FloatVectorType = NULL;
+llvm::VectorType *LLVMTypes::DoubleVectorType = NULL;
 
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8VectorPointerType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16VectorPointerType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32VectorPointerType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
-LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL;
+llvm::Type *LLVMTypes::Int8VectorPointerType = NULL;
+llvm::Type *LLVMTypes::Int16VectorPointerType = NULL;
+llvm::Type *LLVMTypes::Int32VectorPointerType = NULL;
+llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
+llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
+llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL;
 
-LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::VoidPointerVectorType = NULL;
+llvm::VectorType *LLVMTypes::VoidPointerVectorType = NULL;
 
 llvm::Constant *LLVMTrue = NULL;
 llvm::Constant *LLVMFalse = NULL;
@@ -473,9 +473,9 @@ LLVMBoolVector(const bool *bvec) {
 
 
 llvm::Constant *
-LLVMIntAsType(int64_t val, LLVM_TYPE_CONST llvm::Type *type) {
-    LLVM_TYPE_CONST llvm::VectorType *vecType =
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(type);
+LLVMIntAsType(int64_t val, llvm::Type *type) {
+    llvm::VectorType *vecType =
+        llvm::dyn_cast<llvm::VectorType>(type);
 
     if (vecType != NULL) {
         llvm::Constant *v = llvm::ConstantInt::get(vecType->getElementType(),
@@ -491,9 +491,9 @@ LLVMIntAsType(int64_t val, LLVM_TYPE_CONST llvm::Type *type) {
 
 
 llvm::Constant *
-LLVMUIntAsType(uint64_t val, LLVM_TYPE_CONST llvm::Type *type) {
-    LLVM_TYPE_CONST llvm::VectorType *vecType =
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(type);
+LLVMUIntAsType(uint64_t val, llvm::Type *type) {
+    llvm::VectorType *vecType =
+        llvm::dyn_cast<llvm::VectorType>(type);
 
     if (vecType != NULL) {
         llvm::Constant *v = llvm::ConstantInt::get(vecType->getElementType(),
@@ -642,8 +642,8 @@ LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
 bool
 LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) {
     // Make sure we do in fact have a vector of integer values here
-    LLVM_TYPE_CONST llvm::VectorType *vt =
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
+    llvm::VectorType *vt =
+        llvm::dyn_cast<llvm::VectorType>(v->getType());
     Assert(vt != NULL);
     Assert(llvm::isa<llvm::IntegerType>(vt->getElementType()));
 
@@ -696,7 +696,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
 static bool
 lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength,
                  std::vector<llvm::PHINode *> &seenPhis) {
-    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(val->getType()) == false) {
+    if (llvm::isa<llvm::VectorType>(val->getType()) == false) {
         // If we've worked down to a constant int, then the moment of truth
         // has arrived...
         llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(val);
@@ -780,7 +780,7 @@ static bool
 lAllDivBaseEqual(llvm::Value *val, int64_t baseValue, int vectorLength,
                  std::vector<llvm::PHINode *> &seenPhis,
                  bool &canAdd) {
-    Assert(llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(val->getType()));
+    Assert(llvm::isa<llvm::VectorType>(val->getType()));
     // Make sure the base value is a positive power of 2
     Assert(baseValue > 0 && (baseValue & (baseValue-1)) == 0);
 
@@ -790,7 +790,7 @@ lAllDivBaseEqual(llvm::Value *val, int64_t baseValue, int vectorLength,
 
     int64_t vecVals[ISPC_MAX_NVEC];
     int nElts;
-    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(val->getType()) &&
+    if (llvm::isa<llvm::VectorType>(val->getType()) &&
         LLVMExtractVectorInts(val, vecVals, &nElts)) {
         // If we have a vector of compile-time constant integer values,
         // then go ahead and check them directly..
@@ -1074,8 +1074,8 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
 */
 bool
 LLVMVectorValuesAllEqual(llvm::Value *v) {
-    LLVM_TYPE_CONST llvm::VectorType *vt =
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
+    llvm::VectorType *vt =
+        llvm::dyn_cast<llvm::VectorType>(v->getType());
     Assert(vt != NULL);
     int vectorLength = vt->getNumElements();
 
@@ -1344,8 +1344,8 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride,
 */
 bool
 LLVMVectorIsLinear(llvm::Value *v, int stride) {
-    LLVM_TYPE_CONST llvm::VectorType *vt =
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
+    llvm::VectorType *vt =
+        llvm::dyn_cast<llvm::VectorType>(v->getType());
     Assert(vt != NULL);
     int vectorLength = vt->getNumElements();
 
@@ -1399,8 +1399,8 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore,
         return llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt",
                                                 insertBefore);
 
-    LLVM_TYPE_CONST llvm::VectorType *vt =
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
+    llvm::VectorType *vt =
+        llvm::dyn_cast<llvm::VectorType>(v->getType());
     Assert(vt != NULL);
 
     std::string newName = v->getName().str() + std::string(".elt0");
@@ -1443,8 +1443,8 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore,
         llvm::Instruction *phiInsertPos = phi->getParent()->begin();
         llvm::PHINode *scalarPhi = 
             llvm::PHINode::Create(vt->getElementType(), 
-                                  phi->getNumIncomingValues(), newName,
-                                  phiInsertPos);
+                                  phi->getNumIncomingValues(), 
+                                  newName, phiInsertPos);
         phiMap[phi] = scalarPhi;
 
         for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
@@ -1489,8 +1489,8 @@ LLVMConcatVectors(llvm::Value *v1, llvm::Value *v2,
                   llvm::Instruction *insertBefore) {
     Assert(v1->getType() == v2->getType());
 
-    LLVM_TYPE_CONST llvm::VectorType *vt =
-        llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v1->getType());
+    llvm::VectorType *vt =
+        llvm::dyn_cast<llvm::VectorType>(v1->getType());
     Assert(vt != NULL);
 
     int32_t identity[ISPC_MAX_NVEC];
@@ -1518,12 +1518,8 @@ LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2, int32_t shuf[],
             shufVec.push_back(LLVMInt32(shuf[i]));
     }
 
-#ifndef LLVM_2_9
     llvm::ArrayRef<llvm::Constant *> aref(&shufVec[0], &shufVec[shufSize]);
     llvm::Value *vec = llvm::ConstantVector::get(aref);
-#else // LLVM_2_9
-    llvm::Value *vec = llvm::ConstantVector::get(shufVec);
-#endif
 
     return new llvm::ShuffleVectorInst(v1, v2, vec, "shuffle", insertBefore);
 }
diff --git a/llvmutil.h b/llvmutil.h
index 96cdf079..de50ae70 100644
--- a/llvmutil.h
+++ b/llvmutil.h
@@ -48,57 +48,50 @@ namespace llvm {
     class InsertElementInst;
 }
 
-// llvm::Type *s are no longer const in llvm 3.0
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
-#define LLVM_TYPE_CONST
-#else
-#define LLVM_TYPE_CONST const
-#endif
-
 
 /** This structure holds pointers to a variety of LLVM types; code
     elsewhere can use them from here, ratherthan needing to make more
     verbose LLVM API calls.
  */ 
 struct LLVMTypes {
-    static LLVM_TYPE_CONST llvm::Type *VoidType;
-    static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType;
-    static LLVM_TYPE_CONST llvm::Type *PointerIntType;
-    static LLVM_TYPE_CONST llvm::Type *BoolType;
+    static llvm::Type *VoidType;
+    static llvm::PointerType *VoidPointerType;
+    static llvm::Type *PointerIntType;
+    static llvm::Type *BoolType;
 
-    static LLVM_TYPE_CONST llvm::Type *Int8Type;
-    static LLVM_TYPE_CONST llvm::Type *Int16Type;
-    static LLVM_TYPE_CONST llvm::Type *Int32Type;
-    static LLVM_TYPE_CONST llvm::Type *Int64Type;
-    static LLVM_TYPE_CONST llvm::Type *FloatType;
-    static LLVM_TYPE_CONST llvm::Type *DoubleType;
+    static llvm::Type *Int8Type;
+    static llvm::Type *Int16Type;
+    static llvm::Type *Int32Type;
+    static llvm::Type *Int64Type;
+    static llvm::Type *FloatType;
+    static llvm::Type *DoubleType;
 
-    static LLVM_TYPE_CONST llvm::Type *Int8PointerType;
-    static LLVM_TYPE_CONST llvm::Type *Int16PointerType;
-    static LLVM_TYPE_CONST llvm::Type *Int32PointerType;
-    static LLVM_TYPE_CONST llvm::Type *Int64PointerType;
-    static LLVM_TYPE_CONST llvm::Type *FloatPointerType;
-    static LLVM_TYPE_CONST llvm::Type *DoublePointerType;
+    static llvm::Type *Int8PointerType;
+    static llvm::Type *Int16PointerType;
+    static llvm::Type *Int32PointerType;
+    static llvm::Type *Int64PointerType;
+    static llvm::Type *FloatPointerType;
+    static llvm::Type *DoublePointerType;
 
-    static LLVM_TYPE_CONST llvm::VectorType *MaskType;
+    static llvm::VectorType *MaskType;
 
-    static LLVM_TYPE_CONST llvm::VectorType *BoolVectorType;
-    static LLVM_TYPE_CONST llvm::VectorType *Int1VectorType;
-    static LLVM_TYPE_CONST llvm::VectorType *Int8VectorType;
-    static LLVM_TYPE_CONST llvm::VectorType *Int16VectorType;
-    static LLVM_TYPE_CONST llvm::VectorType *Int32VectorType;
-    static LLVM_TYPE_CONST llvm::VectorType *Int64VectorType;
-    static LLVM_TYPE_CONST llvm::VectorType *FloatVectorType;
-    static LLVM_TYPE_CONST llvm::VectorType *DoubleVectorType;
+    static llvm::VectorType *BoolVectorType;
+    static llvm::VectorType *Int1VectorType;
+    static llvm::VectorType *Int8VectorType;
+    static llvm::VectorType *Int16VectorType;
+    static llvm::VectorType *Int32VectorType;
+    static llvm::VectorType *Int64VectorType;
+    static llvm::VectorType *FloatVectorType;
+    static llvm::VectorType *DoubleVectorType;
 
-    static LLVM_TYPE_CONST llvm::Type *Int8VectorPointerType;
-    static LLVM_TYPE_CONST llvm::Type *Int16VectorPointerType;
-    static LLVM_TYPE_CONST llvm::Type *Int32VectorPointerType;
-    static LLVM_TYPE_CONST llvm::Type *Int64VectorPointerType;
-    static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType;
-    static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType;
+    static llvm::Type *Int8VectorPointerType;
+    static llvm::Type *Int16VectorPointerType;
+    static llvm::Type *Int32VectorPointerType;
+    static llvm::Type *Int64VectorPointerType;
+    static llvm::Type *FloatVectorPointerType;
+    static llvm::Type *DoubleVectorPointerType;
 
-    static LLVM_TYPE_CONST llvm::VectorType *VoidPointerVectorType;
+    static llvm::VectorType *VoidPointerVectorType;
 };
 
 /** These variables hold the corresponding LLVM constant values as a
@@ -175,11 +168,11 @@ extern llvm::Constant *LLVMDoubleVector(double f);
 
 /** Returns a constant integer or vector (according to the given type) of
     the given signed integer value. */
-extern llvm::Constant *LLVMIntAsType(int64_t, LLVM_TYPE_CONST llvm::Type *t);
+extern llvm::Constant *LLVMIntAsType(int64_t, llvm::Type *t);
 
 /** Returns a constant integer or vector (according to the given type) of
     the given unsigned integer value. */
-extern llvm::Constant *LLVMUIntAsType(uint64_t, LLVM_TYPE_CONST llvm::Type *t);
+extern llvm::Constant *LLVMUIntAsType(uint64_t, llvm::Type *t);
 
 /** Returns an LLVM boolean vector based on the given array of values.
     The array should have g->target.vectorWidth elements. */
diff --git a/main.cpp b/main.cpp
index b29a9f0f..a98a35bb 100644
--- a/main.cpp
+++ b/main.cpp
@@ -44,16 +44,9 @@
 #ifdef ISPC_IS_WINDOWS
   #include <time.h>
 #endif // ISPC_IS_WINDOWS
-#include <llvm/Support/PrettyStackTrace.h>
 #include <llvm/Support/Signals.h>
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
-  #include <llvm/Support/TargetRegistry.h>
-  #include <llvm/Support/TargetSelect.h>
-#else
-  #include <llvm/Target/TargetRegistry.h>
-  #include <llvm/Target/TargetSelect.h>
-  #include <llvm/Target/SubtargetFeature.h>
-#endif
+#include <llvm/Support/TargetRegistry.h>
+#include <llvm/Support/TargetSelect.h>
 
 #ifdef ISPC_IS_WINDOWS
 #define strcasecmp stricmp
@@ -67,9 +60,7 @@ static void
 lPrintVersion() {
     printf("Intel(r) SPMD Program Compiler (ispc), %s (build %s @ %s, LLVM %s)\n", 
            ISPC_VERSION, BUILD_VERSION, BUILD_DATE, 
-#ifdef LLVM_2_9
-           "2.9"
-#elif defined(LLVM_3_0) || defined(LLVM_3_0svn)
+#if defined(LLVM_3_0)
            "3.0"
 #elif defined(LLVM_3_1) || defined(LLVM_3_1svn)
            "3.1"
@@ -91,12 +82,10 @@ usage(int ret) {
            Target::SupportedTargetArchs());
     printf("    [--c++-include-file=<name>]\t\tSpecify name of file to emit in #include statement in generated C++ code.\n");
     printf("    [--cpu=<cpu>]\t\t\tSelect target CPU type\n");
-    printf("         <cpu>={%s}\n", Target::SupportedTargetCPUs());
+    printf("         <cpu>={%s}\n", Target::SupportedTargetCPUs().c_str());
     printf("    [-D<foo>]\t\t\t\t#define given value when running preprocessor\n");
     printf("    [--emit-asm]\t\t\tGenerate assembly language file as output\n");
-#ifndef LLVM_2_9
     printf("    [--emit-c++]\t\t\tEmit a C++ source file as output\n");
-#endif // !LLVM_2_9
     printf("    [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
     printf("    [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
     printf("    [-g]\t\t\t\tGenerate debugging information\n");
@@ -202,17 +191,18 @@ static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128]) {
 }
 
 
+static void
+lSignal(void *) {
+    FATAL("Unhandled signal sent to process; terminating.");
+}
+
+
 int main(int Argc, char *Argv[]) {
     int argc;
     char *argv[128];
     lGetAllArgs(Argc, Argv, argc, argv);
 
-#if 0
-    // Use LLVM's little utility function to print out nice stack traces if
-    // we crash
-    llvm::sys::PrintStackTraceOnErrorSignal();
-    llvm::PrettyStackTraceProgram X(argc, argv);
-#endif
+    llvm::sys::AddSignalHandler(lSignal, NULL);
 
     // initialize available LLVM targets
     LLVMInitializeX86TargetInfo();
@@ -220,9 +210,7 @@ int main(int Argc, char *Argv[]) {
     LLVMInitializeX86AsmPrinter();
     LLVMInitializeX86AsmParser();
     LLVMInitializeX86Disassembler();
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     LLVMInitializeX86TargetMC();
-#endif
 
     char *file = NULL;
     const char *headerFileName = NULL;
@@ -279,10 +267,8 @@ int main(int Argc, char *Argv[]) {
         }
         else if (!strcmp(argv[i], "--emit-asm"))
             ot = Module::Asm;
-#ifndef LLVM_2_9
         else if (!strcmp(argv[i], "--emit-c++"))
             ot = Module::CXX;
-#endif // !LLVM_2_9
         else if (!strcmp(argv[i], "--emit-llvm"))
             ot = Module::Bitcode;
         else if (!strcmp(argv[i], "--emit-obj"))
diff --git a/module.cpp b/module.cpp
index 99da37ab..d082255f 100644
--- a/module.cpp
+++ b/module.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -88,6 +88,24 @@
 #include <llvm/Support/raw_ostream.h>
 #include <llvm/Bitcode/ReaderWriter.h>
 
+static void
+lDeclareSizeAndPtrIntTypes(SymbolTable *symbolTable) {
+    const Type *ptrIntType = (g->target.is32Bit) ? AtomicType::VaryingInt32 :
+        AtomicType::VaryingInt64;
+    ptrIntType = ptrIntType->GetAsUnboundVariabilityType();
+
+    symbolTable->AddType("intptr_t", ptrIntType, SourcePos());
+    symbolTable->AddType("uintptr_t", ptrIntType->GetAsUnsignedType(),
+                         SourcePos());
+    symbolTable->AddType("ptrdiff_t", ptrIntType, SourcePos());
+
+    const Type *sizeType = (g->target.is32Bit || g->opt.force32BitAddressing) ?
+        AtomicType::VaryingInt32 : AtomicType::VaryingInt64;
+    sizeType = sizeType->GetAsUnboundVariabilityType();
+    symbolTable->AddType("size_t", sizeType, SourcePos());
+}
+
+
 ///////////////////////////////////////////////////////////////////////////
 // Module
 
@@ -103,6 +121,8 @@ Module::Module(const char *fn) {
     symbolTable = new SymbolTable;
     ast = new AST;
 
+    lDeclareSizeAndPtrIntTypes(symbolTable);
+
     module = new llvm::Module(filename ? filename : "<stdin>", *g->ctx);
     module->setTargetTriple(g->target.GetTripleString());
 
@@ -211,116 +231,170 @@ Module::CompileFile() {
 
 
 void
-Module::AddTypeDef(Symbol *sym) {
+Module::AddTypeDef(const std::string &name, const Type *type,
+                   SourcePos pos) {
     // Typedefs are easy; just add the mapping between the given name and
     // the given type.
-    symbolTable->AddType(sym->name.c_str(), sym->type, sym->pos);
+    symbolTable->AddType(name.c_str(), type, pos);
 }
 
 
 void
-Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) {
+Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initExpr, 
+                          bool isConst, StorageClass storageClass, SourcePos pos) {
     // These may be NULL due to errors in parsing; just gracefully return
     // here if so.
-    if (sym == NULL || sym->type == NULL) {
-        // But if these are NULL and there haven't been any previous
-        // errors, something surprising is going on
+    if (name == "" || type == NULL) {
         Assert(errorCount > 0);
         return;
     }
 
-    if (symbolTable->LookupFunction(sym->name.c_str())) {
-        Error(sym->pos, "Global variable \"%s\" shadows previously-declared "
-              "function.", sym->name.c_str());
+    if (symbolTable->LookupFunction(name.c_str())) {
+        Error(pos, "Global variable \"%s\" shadows previously-declared "
+              "function.", name.c_str());
         return;
     }
 
-    if (sym->storageClass == SC_EXTERN_C) {
-        Error(sym->pos, "extern \"C\" qualifier can only be used for "
+    if (storageClass == SC_EXTERN_C) {
+        Error(pos, "extern \"C\" qualifier can only be used for "
               "functions.");
         return;
     }
 
-    if (Type::Equal(sym->type, AtomicType::Void)) {
-        Error(sym->pos, "\"void\" type global variable is illegal.");
+    if (Type::Equal(type, AtomicType::Void)) {
+        Error(pos, "\"void\" type global variable is illegal.");
         return;
     }
 
-    sym->type = ArrayType::SizeUnsizedArrays(sym->type, initExpr);
-    if (sym->type == NULL)
+    type = ArrayType::SizeUnsizedArrays(type, initExpr);
+    if (type == NULL)
         return;
 
-    const ArrayType *at = dynamic_cast<const ArrayType *>(sym->type);
+    const ArrayType *at = dynamic_cast<const ArrayType *>(type);
     if (at != NULL && at->TotalElementCount() == 0) {
-        Error(sym->pos, "Illegal to declare a global variable with unsized "
+        Error(pos, "Illegal to declare a global variable with unsized "
               "array dimensions that aren't set with an initializer "
               "expression.");
         return;
     }
         
-    LLVM_TYPE_CONST llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
+    llvm::Type *llvmType = type->LLVMType(g->ctx);
     if (llvmType == NULL)
         return;
 
     // See if we have an initializer expression for the global.  If so,
     // make sure it's a compile-time constant!
     llvm::Constant *llvmInitializer = NULL;
-    if (sym->storageClass == SC_EXTERN || sym->storageClass == SC_EXTERN_C) {
+    ConstExpr *constValue = NULL;
+    if (storageClass == SC_EXTERN || storageClass == SC_EXTERN_C) {
         if (initExpr != NULL)
-            Error(sym->pos, "Initializer can't be provided with \"extern\" "
-                  "global variable \"%s\".", sym->name.c_str());
+            Error(pos, "Initializer can't be provided with \"extern\" "
+                  "global variable \"%s\".", name.c_str());
     }
-    else if (initExpr != NULL) {
-        initExpr = TypeCheck(initExpr);
+    else {
         if (initExpr != NULL) {
-            // We need to make sure the initializer expression is
-            // the same type as the global.  (But not if it's an
-            // ExprList; they don't have types per se / can't type
-            // convert themselves anyway.)
-            if (dynamic_cast<ExprList *>(initExpr) == NULL)
-                initExpr = TypeConvertExpr(initExpr, sym->type, "initializer");
-            
+            initExpr = TypeCheck(initExpr);
             if (initExpr != NULL) {
-                initExpr = Optimize(initExpr);
-                // Fingers crossed, now let's see if we've got a
-                // constant value..
-                llvmInitializer = initExpr->GetConstant(sym->type);
+                // We need to make sure the initializer expression is
+                // the same type as the global.  (But not if it's an
+                // ExprList; they don't have types per se / can't type
+                // convert themselves anyway.)
+                if (dynamic_cast<ExprList *>(initExpr) == NULL)
+                    initExpr = TypeConvertExpr(initExpr, type, "initializer");
+            
+                if (initExpr != NULL) {
+                    initExpr = Optimize(initExpr);
+                    // Fingers crossed, now let's see if we've got a
+                    // constant value..
+                    llvmInitializer = initExpr->GetConstant(type);
 
-                if (llvmInitializer != NULL) {
-                    if (sym->type->IsConstType())
-                        // Try to get a ConstExpr associated with
-                        // the symbol.  This dynamic_cast can
-                        // validly fail, for example for types like
-                        // StructTypes where a ConstExpr can't
-                        // represent their values.
-                        sym->constValue = 
-                            dynamic_cast<ConstExpr *>(initExpr);
+                    if (llvmInitializer != NULL) {
+                        if (type->IsConstType())
+                            // Try to get a ConstExpr associated with
+                            // the symbol.  This dynamic_cast can
+                            // validly fail, for example for types like
+                            // StructTypes where a ConstExpr can't
+                            // represent their values.
+                            constValue = dynamic_cast<ConstExpr *>(initExpr);
+                    }
+                    else
+                        Error(initExpr->pos, "Initializer for global variable \"%s\" "
+                              "must be a constant.", name.c_str());
                 }
-                else
-                    Error(initExpr->pos, "Initializer for global variable \"%s\" "
-                          "must be a constant.", sym->name.c_str());
             }
         }
+
+        // If no initializer was provided or if we couldn't get a value
+        // above, initialize it with zeros..
+        if (llvmInitializer == NULL)
+            llvmInitializer = llvm::Constant::getNullValue(llvmType);
     }
 
-    // If no initializer was provided or if we couldn't get a value
-    // above, initialize it with zeros..
-    if (llvmInitializer == NULL)
-        llvmInitializer = llvm::Constant::getNullValue(llvmType);
+    Symbol *sym = symbolTable->LookupVariable(name.c_str());
+    llvm::GlobalVariable *oldGV = NULL;
+    if (sym != NULL) {
+        // We've already seen either a declaration or a definition of this
+        // global.
+
+        // If the type doesn't match with the previous one, issue an error.
+        if (!Type::Equal(sym->type, type) ||
+            (sym->storageClass != SC_EXTERN && 
+             sym->storageClass != SC_EXTERN_C &&
+             sym->storageClass != storageClass)) {
+            Error(pos, "Definition of variable \"%s\" conflicts with "
+                  "definition at %s:%d.", name.c_str(), 
+                  sym->pos.name, sym->pos.first_line);
+            return;
+        }
+
+        llvm::GlobalVariable *gv = 
+            llvm::dyn_cast<llvm::GlobalVariable>(sym->storagePtr);
+        Assert(gv != NULL);
+
+        // And issue an error if this is a redefinition of a variable
+        if (gv->hasInitializer() && 
+            sym->storageClass != SC_EXTERN && sym->storageClass != SC_EXTERN_C) {
+            Error(pos, "Redefinition of variable \"%s\" is illegal. "
+                  "(Previous definition at %s:%d.)", sym->name.c_str(),
+                  sym->pos.name, sym->pos.first_line);
+            return;
+        }
+
+        // Now, we either have a redeclaration of a global, or a definition
+        // of a previously-declared global.  First, save the pointer to the
+        // previous llvm::GlobalVariable
+        oldGV = gv;
+    }
+    else {
+        sym = new Symbol(name, pos, type, storageClass);
+        symbolTable->AddVariable(sym);
+    }
+    sym->constValue = constValue;
 
     llvm::GlobalValue::LinkageTypes linkage =
         (sym->storageClass == SC_STATIC) ? llvm::GlobalValue::InternalLinkage :
         llvm::GlobalValue::ExternalLinkage;
+
+    // Note that the NULL llvmInitializer is what leads to "extern"
+    // declarations coming up extern and not defining storage (a bit
+    // subtle)...
     sym->storagePtr = new llvm::GlobalVariable(*module, llvmType, isConst, 
                                                linkage, llvmInitializer, 
                                                sym->name.c_str());
-    symbolTable->AddVariable(sym);
 
-    if (diBuilder && (sym->storageClass != SC_EXTERN)) {
-        llvm::DIFile file = sym->pos.GetDIFile();
-        diBuilder->createGlobalVariable(sym->name, 
+    // Patch up any references to the previous GlobalVariable (e.g. from a
+    // declaration of a global that was later defined.)
+    if (oldGV != NULL) {
+        oldGV->replaceAllUsesWith(sym->storagePtr);
+        oldGV->removeFromParent();
+        sym->storagePtr->setName(sym->name.c_str());
+    }
+    
+    if (diBuilder) {
+        llvm::DIFile file = pos.GetDIFile();
+        diBuilder->createGlobalVariable(name, 
                                         file,
-                                        sym->pos.first_line,
+                                        pos.first_line,
                                         sym->type->GetDIType(file),
                                         (sym->storageClass == SC_STATIC),
                                         sym->storagePtr);
@@ -411,22 +485,23 @@ lCheckForStructParameters(const FunctionType *ftype, SourcePos pos) {
     false if any errors were encountered.
  */
 void
-Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
-    const FunctionType *functionType = 
-        dynamic_cast<const FunctionType *>(funSym->type);
+Module::AddFunctionDeclaration(const std::string &name, 
+                               const FunctionType *functionType, 
+                               StorageClass storageClass, bool isInline,
+                               SourcePos pos) {
     Assert(functionType != NULL);
 
     // If a global variable with the same name has already been declared
     // issue an error.
-    if (symbolTable->LookupVariable(funSym->name.c_str()) != NULL) {
-        Error(funSym->pos, "Function \"%s\" shadows previously-declared global variable. "
+    if (symbolTable->LookupVariable(name.c_str()) != NULL) {
+        Error(pos, "Function \"%s\" shadows previously-declared global variable. "
               "Ignoring this definition.",
-              funSym->name.c_str());
+              name.c_str());
         return;
     }
 
     std::vector<Symbol *> overloadFuncs;
-    symbolTable->LookupFunction(funSym->name.c_str(), &overloadFuncs);
+    symbolTable->LookupFunction(name.c_str(), &overloadFuncs);
     if (overloadFuncs.size() > 0) {
         for (unsigned int i = 0; i < overloadFuncs.size(); ++i) {
             Symbol *overloadFunc = overloadFuncs[i];
@@ -450,65 +525,67 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
                         break;
                 }
                 if (i == functionType->GetNumParameters()) {
-                    Error(funSym->pos, "Illegal to overload function by return "
-                          "type only (previous declaration was at line %d of "
-                          "file %s).", overloadFunc->pos.first_line,
-                          overloadFunc->pos.name);
+                    std::string thisRetType = functionType->GetReturnTypeString();
+                    std::string otherRetType = ofType->GetReturnTypeString();
+                    Error(pos, "Illegal to overload function by return "
+                          "type only.  This function returns \"%s\" while "
+                          "previous declaration at %s:%d returns \"%s\".",
+                          thisRetType.c_str(), overloadFunc->pos.name,
+                          overloadFunc->pos.first_line, otherRetType.c_str());
                     return;
                 }
             }
         }
     }
 
-    if (funSym->storageClass == SC_EXTERN_C) {
+    if (storageClass == SC_EXTERN_C) {
         // Make sure the user hasn't supplied both an 'extern "C"' and a
         // 'task' qualifier with the function
         if (functionType->isTask) {
-            Error(funSym->pos, "\"task\" qualifier is illegal with C-linkage extern "
-                  "function \"%s\".  Ignoring this function.", funSym->name.c_str());
+            Error(pos, "\"task\" qualifier is illegal with C-linkage extern "
+                  "function \"%s\".  Ignoring this function.", name.c_str());
             return;
         }
 
         std::vector<Symbol *> funcs;
-        symbolTable->LookupFunction(funSym->name.c_str(), &funcs);
+        symbolTable->LookupFunction(name.c_str(), &funcs);
         if (funcs.size() > 0) {
             if (funcs.size() > 1) {
                 // Multiple functions with this name have already been declared; 
                 // can't overload here
-                Error(funSym->pos, "Can't overload extern \"C\" function \"%s\"; "
+                Error(pos, "Can't overload extern \"C\" function \"%s\"; "
                       "%d functions with the same name have already been declared.",
-                      funSym->name.c_str(), (int)funcs.size());
+                      name.c_str(), (int)funcs.size());
                 return;
             }
 
             // One function with the same name has been declared; see if it
             // has the same type as this one, in which case it's ok.
-            if (Type::Equal(funcs[0]->type, funSym->type))
+            if (Type::Equal(funcs[0]->type, functionType))
                 return;
             else {
-                Error(funSym->pos, "Can't overload extern \"C\" function \"%s\".",
-                      funSym->name.c_str());
+                Error(pos, "Can't overload extern \"C\" function \"%s\".",
+                      name.c_str());
                 return;
             }
         }
     }
 
     // Get the LLVM FunctionType
-    bool includeMask = (funSym->storageClass != SC_EXTERN_C);
-    LLVM_TYPE_CONST llvm::FunctionType *llvmFunctionType = 
+    bool includeMask = (storageClass != SC_EXTERN_C);
+    llvm::FunctionType *llvmFunctionType = 
         functionType->LLVMFunctionType(g->ctx, includeMask);
     if (llvmFunctionType == NULL)
         return;
 
     // And create the llvm::Function
-    llvm::GlobalValue::LinkageTypes linkage = (funSym->storageClass == SC_STATIC ||
+    llvm::GlobalValue::LinkageTypes linkage = (storageClass == SC_STATIC ||
                                                isInline) ?
         llvm::GlobalValue::InternalLinkage : llvm::GlobalValue::ExternalLinkage;
-    std::string functionName;
-    if (funSym->storageClass == SC_EXTERN_C)
-        functionName = funSym->name;
-    else {
-        functionName = funSym->MangledName();
+
+    std::string functionName = name;
+    if (storageClass != SC_EXTERN_C) {
+        functionName += functionType->Mangle();
         if (g->mangleFunctionsWithTarget)
             functionName += g->target.GetISAString();
     }
@@ -518,7 +595,7 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
 
     // Set function attributes: we never throw exceptions
     function->setDoesNotThrow(true);
-    if (!(funSym->storageClass == SC_EXTERN_C) && 
+    if (storageClass != SC_EXTERN_C && 
         !g->generateDebuggingSymbols &&
         isInline)
         function->addFnAttr(llvm::Attribute::AlwaysInline);
@@ -528,17 +605,17 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
 
     // Make sure that the return type isn't 'varying' if the function is
     // 'export'ed.
-    if (funSym->storageClass == SC_EXPORT && 
+    if (functionType->isExported && 
         lRecursiveCheckValidParamType(functionType->GetReturnType()))
-        Error(funSym->pos, "Illegal to return a \"varying\" type from exported "
-              "function \"%s\"", funSym->name.c_str());
+        Error(pos, "Illegal to return a \"varying\" type from exported "
+              "function \"%s\"", name.c_str());
 
     if (functionType->isTask && 
         Type::Equal(functionType->GetReturnType(), AtomicType::Void) == false)
-        Error(funSym->pos, "Task-qualified functions must have void return type.");
+        Error(pos, "Task-qualified functions must have void return type.");
 
     if (functionType->isExported || functionType->isExternC)
-        lCheckForStructParameters(functionType, funSym->pos);
+        lCheckForStructParameters(functionType, pos);
 
     // Loop over all of the arguments; process default values if present
     // and do other checks and parameter attribute setting.
@@ -547,12 +624,12 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
     for (int i = 0; i < nArgs; ++i) {
         const Type *argType = functionType->GetParameterType(i);
         const std::string &argName = functionType->GetParameterName(i);
-        ConstExpr *defaultValue = functionType->GetParameterDefault(i);
+        Expr *defaultValue = functionType->GetParameterDefault(i);
         const SourcePos &argPos = functionType->GetParameterSourcePos(i);
 
         // If the function is exported, make sure that the parameter
         // doesn't have any varying stuff going on in it.
-        if (funSym->storageClass == SC_EXPORT)
+        if (functionType->isExported)
             lCheckForVaryingParameter(argType, argName, argPos);
 
         // ISPC assumes that no pointers alias.  (It should be possible to
@@ -596,29 +673,41 @@ Module::AddFunctionDeclaration(Symbol *funSym, bool isInline) {
         function->eraseFromParent();
         function = module->getFunction(functionName);
     }
-    funSym->function = function;
 
     // Finally, we know all is good and we can add the function to the
     // symbol table
+    Symbol *funSym = new Symbol(name, pos, functionType, storageClass);
+    funSym->function = function;
     bool ok = symbolTable->AddFunction(funSym);
     Assert(ok);
 }
 
 
 void
-Module::AddFunctionDefinition(Symbol *sym, const std::vector<Symbol *> &args,
+Module::AddFunctionDefinition(const std::string &name, const FunctionType *type,
                               Stmt *code) {
-    ast->AddFunction(sym, args, code);
+    Symbol *sym = symbolTable->LookupFunction(name.c_str(), type);
+    if (sym == NULL) {
+        Assert(m->errorCount > 0);
+        return;
+    }
+
+    // FIXME: because we encode the parameter names in the function type,
+    // we need to override the function type here in case the function had
+    // earlier been declared with anonymous parameter names but is now
+    // defined with actual names.  This is yet another reason we shouldn't
+    // include the names in FunctionType...  
+    sym->type = type;
+
+    ast->AddFunction(sym, code);
 }
 
 
 bool
 Module::writeOutput(OutputType outputType, const char *outFileName,
                     const char *includeFileName) {
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     if (diBuilder != NULL && outputType != Header)
         diBuilder->finalize();
-#endif // LLVM_3_0
 
     // First, issue a warning if the output file suffix and the type of
     // file being created seem to mismatch.  This can help catch missing
@@ -640,14 +729,12 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
             if (strcasecmp(suffix, "o") && strcasecmp(suffix, "obj"))
                 fileType = "object";
             break;
-#ifndef LLVM_2_9
         case CXX:
             if (strcasecmp(suffix, "c") && strcasecmp(suffix, "cc") &&
                 strcasecmp(suffix, "c++") && strcasecmp(suffix, "cxx") &&
                 strcasecmp(suffix, "cpp"))
                 fileType = "c++";
             break;
-#endif // !LLVM_2_9
         case Header:
             if (strcasecmp(suffix, "h") && strcasecmp(suffix, "hh") &&
                 strcasecmp(suffix, "hpp"))
@@ -663,14 +750,12 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
         return writeHeader(outFileName);
     else if (outputType == Bitcode)
         return writeBitcode(module, outFileName);
-#ifndef LLVM_2_9
     else if (outputType == CXX) {
         extern bool WriteCXXFile(llvm::Module *module, const char *fn, 
                                  int vectorWidth, const char *includeName);
         return WriteCXXFile(module, outFileName, g->target.vectorWidth,
                             includeFileName);
     }
-#endif // !LLVM_2_9
     else
         return writeObjectFileOrAssembly(outputType, outFileName);
 }
@@ -755,109 +840,56 @@ Module::writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine,
 }
 
 
-/** Small structure used in representing dependency graphs of structures
-    (i.e. given a StructType, which other structure types does it have as
-    elements).
- */ 
-struct StructDAGNode {
-    StructDAGNode()
-        : visited(false) { }
-
-    bool visited;
-    std::vector<const StructType *> dependents;
-};
-
-
-/** Visit a node for the topological sort.
+/** Emits a declaration for the given struct to the given file.  This
+    function first makes sure that declarations for any structs that are
+    (recursively) members of this struct are emitted first.
  */
 static void
-lVisitNode(const StructType *structType, 
-           std::map<const StructType *, StructDAGNode *> &structToNode,
-           std::vector<const StructType *> &sortedTypes) {
-    Assert(structToNode.find(structType) != structToNode.end());
-    // Get the node that encodes the structs that this one is immediately
-    // dependent on.
-    StructDAGNode *node = structToNode[structType];
-    if (node->visited)
-        return;
+lEmitStructDecl(const StructType *st, std::vector<const StructType *> *emittedStructs,
+                FILE *file) {
+    // Has this struct type already been declared?  (This happens if it's a
+    // member of another struct for which we emitted a declaration
+    // previously.)
+    for (int i = 0; i < (int)emittedStructs->size(); ++i)
+        if (Type::EqualIgnoringConst(st, (*emittedStructs)[i]))
+            return;
 
-    node->visited = true;
-    // Depth-first traversal: visit all of the dependent nodes...
-    for (unsigned int i = 0; i < node->dependents.size(); ++i)
-        lVisitNode(node->dependents[i], structToNode, sortedTypes);
-    // ...and then add this one to the sorted list
-    sortedTypes.push_back(structType);
+    // Otherwise first make sure any contained structs have been declared.
+    for (int i = 0; i < st->GetElementCount(); ++i) {
+        const StructType *elementStructType = 
+            dynamic_cast<const StructType *>(st->GetElementType(i));
+        if (elementStructType != NULL)
+            lEmitStructDecl(elementStructType, emittedStructs, file);
+    }
+
+    // And now it's safe to declare this one
+    emittedStructs->push_back(st);
+
+    fprintf(file, "struct %s", st->GetStructName().c_str());
+    if (st->GetSOAWidth() > 0)
+        // This has to match the naming scheme in
+        // StructType::GetCDeclaration().
+        fprintf(file, "_SOA%d", st->GetSOAWidth());
+    fprintf(file, " {\n");
+
+    for (int i = 0; i < st->GetElementCount(); ++i) {
+        const Type *type = st->GetElementType(i)->GetAsNonConstType();
+        std::string d = type->GetCDeclaration(st->GetElementName(i));
+        fprintf(file, "    %s;\n", d.c_str());
+    }
+    fprintf(file, "};\n\n");
 }
-           
+
 
 /** Given a set of structures that we want to print C declarations of in a
-    header file, order them so that any struct that is used as a member
-    variable in another struct is printed before the struct that uses it
-    and then print them to the given file.
+    header file, emit their declarations.
  */
 static void
 lEmitStructDecls(std::vector<const StructType *> &structTypes, FILE *file) {
-    // First, build a DAG among the struct types where there is an edge
-    // from node A to node B if struct type A depends on struct type B
-
-    // Records the struct types that have incoming edges in the
-    // DAG--i.e. the ones that one or more other struct types depend on
-    std::set<const StructType *> hasIncomingEdges;
-    // Records the mapping between struct type pointers and the
-    // StructDagNode structures
-    std::map<const StructType *, StructDAGNode *> structToNode;
-    for (unsigned int i = 0; i < structTypes.size(); ++i) {
-        // For each struct type, create its DAG node and record the
-        // relationship between it and its node
-        const StructType *st = structTypes[i];
-        StructDAGNode *node = new StructDAGNode;
-        structToNode[st] = node;
-
-        for (int j = 0; j < st->GetElementCount(); ++j) {
-            const StructType *elementStructType = 
-                dynamic_cast<const StructType *>(st->GetElementType(j));
-            // If this element is a struct type and we haven't already
-            // processed it for the current struct type, then upate th
-            // dependencies and record that this element type has other
-            // struct types that depend on it.
-            if (elementStructType != NULL &&
-                (std::find(node->dependents.begin(), node->dependents.end(), 
-                           elementStructType) == node->dependents.end())) {
-                node->dependents.push_back(elementStructType);
-                hasIncomingEdges.insert(elementStructType);
-            }
-        }
-    }
-
-    // Perform a topological sort of the struct types.  Kick it off by
-    // visiting nodes with no incoming edges; i.e. the struct types that no
-    // other struct types depend on.
-    std::vector<const StructType *> sortedTypes;
-    for (unsigned int i = 0; i < structTypes.size(); ++i) {
-        const StructType *structType = structTypes[i];
-        if (hasIncomingEdges.find(structType) == hasIncomingEdges.end())
-            lVisitNode(structType, structToNode, sortedTypes);
-    }
-    Assert(sortedTypes.size() == structTypes.size());
-
-    // And finally we can emit the struct declarations by going through the
-    // sorted ones in order.
-    for (unsigned int i = 0; i < sortedTypes.size(); ++i) {
-        const StructType *st = sortedTypes[i];
-        fprintf(file, "struct %s", st->GetStructName().c_str());
-        if (st->GetSOAWidth() > 0)
-            // This has to match the naming scheme in
-            // StructType::GetCDeclaration().
-            fprintf(file, "_SOA%d", st->GetSOAWidth());
-        fprintf(file, " {\n");
-
-        for (int j = 0; j < st->GetElementCount(); ++j) {
-            const Type *type = st->GetElementType(j)->GetAsNonConstType();
-            std::string d = type->GetCDeclaration(st->GetElementName(j));
-            fprintf(file, "    %s;\n", d.c_str());
-        }
-        fprintf(file, "};\n\n");
-    }
+    std::vector<const StructType *> emittedStructs;
+    for (unsigned int i = 0; i < structTypes.size(); ++i)
+        lEmitStructDecl(structTypes[i], &emittedStructs, file);
+    Assert(emittedStructs.size() == structTypes.size());
 }
 
 
@@ -1024,21 +1056,6 @@ lPrintFunctionDeclarations(FILE *file, const std::vector<Symbol *> &funcs) {
 }
 
 
-static void
-lPrintExternGlobals(FILE *file, const std::vector<Symbol *> &externGlobals) {
-    for (unsigned int i = 0; i < externGlobals.size(); ++i) {
-        Symbol *sym = externGlobals[i];
-        if (lRecursiveCheckValidParamType(sym->type))
-            Warning(sym->pos, "Not emitting declaration for symbol \"%s\" into "
-                    "generated header file since it (or some of its members) "
-                    "has types that are illegal in exported symbols.",
-                    sym->name.c_str());
-        else
-            fprintf(file, "extern %s;\n", sym->type->GetCDeclaration(sym->name).c_str());
-    }
-}
-
-
 static bool
 lIsExported(const Symbol *sym) {
     const FunctionType *ft = dynamic_cast<const FunctionType *>(sym->type);
@@ -1055,12 +1072,6 @@ lIsExternC(const Symbol *sym) {
 }
 
 
-static bool
-lIsExternGlobal(const Symbol *sym) {
-    return sym->storageClass == SC_EXTERN || sym->storageClass == SC_EXTERN_C;
-}
-
-
 bool
 Module::writeHeader(const char *fn) {
     FILE *f = fopen(fn, "w");
@@ -1113,13 +1124,6 @@ Module::writeHeader(const char *fn) {
     lGetExportedParamTypes(externCFuncs, &exportedStructTypes,
                            &exportedEnumTypes, &exportedVectorTypes);
 
-    // And do the same for the 'extern' globals
-    std::vector<Symbol *> externGlobals;
-    symbolTable->GetMatchingVariables(lIsExternGlobal, &externGlobals);
-    for (unsigned int i = 0; i < externGlobals.size(); ++i)
-        lGetExportedTypes(externGlobals[i]->type, &exportedStructTypes,
-                          &exportedEnumTypes, &exportedVectorTypes);
-
     // And print them
     lEmitVectorTypedefs(exportedVectorTypes, f);
     lEmitEnumDecls(exportedEnumTypes, f);
@@ -1146,15 +1150,6 @@ Module::writeHeader(const char *fn) {
     // end namespace
     fprintf(f, "\n#ifdef __cplusplus\n}\n#endif // __cplusplus\n");
 
-    // and only now emit externs for globals, outside of the ispc namespace
-    if (externGlobals.size() > 0) {
-        fprintf(f, "\n");
-        fprintf(f, "///////////////////////////////////////////////////////////////////////////\n");
-        fprintf(f, "// Globals declared \"extern\" from ispc code\n");
-        fprintf(f, "///////////////////////////////////////////////////////////////////////////\n");
-        lPrintExternGlobals(f, externGlobals);
-    }
-
     // end guard
     fprintf(f, "\n#endif // %s\n", guard.c_str());
 
@@ -1171,18 +1166,12 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
 
     llvm::raw_fd_ostream stderrRaw(2, false);
 
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     clang::TextDiagnosticPrinter *diagPrinter =
         new clang::TextDiagnosticPrinter(stderrRaw, clang::DiagnosticOptions());
     llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> diagIDs(new clang::DiagnosticIDs);
     clang::DiagnosticsEngine *diagEngine = 
         new clang::DiagnosticsEngine(diagIDs, diagPrinter);
     inst.setDiagnostics(diagEngine);
-#else
-    clang::TextDiagnosticPrinter *diagPrinter = 
-        new clang::TextDiagnosticPrinter(stderrRaw, clang::DiagnosticOptions());
-    inst.createDiagnostics(0, NULL, diagPrinter);
-#endif
 
     clang::TargetOptions &options = inst.getTargetOpts();
     llvm::Triple triple(module->getTargetTriple());
@@ -1208,9 +1197,7 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
 
     clang::HeaderSearchOptions &headerOpts = inst.getHeaderSearchOpts();
     headerOpts.UseBuiltinIncludes = 0;
-#ifndef LLVM_2_9
     headerOpts.UseStandardSystemIncludes = 0;
-#endif // !LLVM_2_9
     headerOpts.UseStandardCXXIncludes = 0;
     if (g->debugPrint)
         headerOpts.Verbose = 1;
@@ -1418,7 +1405,7 @@ lAddExtractedGlobals(llvm::Module *module,
     for (unsigned int i = 0; i < globals[firstActive].size(); ++i) {
         RewriteGlobalInfo &rgi = globals[firstActive][i];
         llvm::GlobalVariable *gv = rgi.gv;
-        LLVM_TYPE_CONST llvm::Type *type = gv->getType()->getElementType();
+        llvm::Type *type = gv->getType()->getElementType();
         llvm::Constant *initializer = rgi.init;
 
         // Create a new global in the given model that matches the original
@@ -1482,7 +1469,7 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc,
     // we'll start by generating an 'extern' declaration of each one that
     // we have in the current module so that we can then call out to that.
     llvm::Function *targetFuncs[Target::NUM_ISAS];
-    LLVM_TYPE_CONST llvm::FunctionType *ftype = NULL;
+    llvm::FunctionType *ftype = NULL;
 
     for (int i = 0; i < Target::NUM_ISAS; ++i) {
         if (funcs.func[i] == NULL) {
@@ -1490,10 +1477,14 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc,
             continue;
         }
 
-        // Grab the type of the function as well.
-        if (ftype != NULL)
-            Assert(ftype == funcs.func[i]->getFunctionType());
-        else
+        // Grab the type of the function as well.  Note that the various
+        // functions will have different types if they have arguments that
+        // are pointers to structs, due to the fact that we mangle LLVM
+        // struct type names with the target vector width.  However,
+        // because we only allow uniform stuff to pass through the
+        // export'ed function layer, they should all have the same memory
+        // layout, so this is benign..
+        if (ftype == NULL)
             ftype = funcs.func[i]->getFunctionType();
 
         targetFuncs[i] = 
@@ -1548,24 +1539,13 @@ lCreateDispatchFunction(llvm::Module *module, llvm::Function *setISAFunc,
         for (; argIter != dispatchFunc->arg_end(); ++argIter)
             args.push_back(argIter);
         if (voidReturn) {
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
             llvm::CallInst::Create(targetFuncs[i], args, "", callBBlock);
-#else
-            llvm::CallInst::Create(targetFuncs[i], args.begin(), args.end(),
-                                   "", callBBlock);
-#endif
             llvm::ReturnInst::Create(*g->ctx, callBBlock);
         }
         else {
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
             llvm::Value *retValue = 
                 llvm::CallInst::Create(targetFuncs[i], args, "ret_value", 
                                        callBBlock);
-#else
-            llvm::Value *retValue = 
-                llvm::CallInst::Create(targetFuncs[i], args.begin(), args.end(),
-                                       "ret_value", callBBlock);
-#endif
             llvm::ReturnInst::Create(*g->ctx, retValue, callBBlock);
         }
 
@@ -1663,13 +1643,11 @@ Module::CompileAndOutput(const char *srcFile, const char *arch, const char *cpu,
         return errorCount > 0;
     }
     else {
-#ifndef LLVM_2_9
         if (outputType == CXX) {
             Error(SourcePos(), "Illegal to specify more then one target when "
                   "compiling C++ output.");
             return 1;
         }
-#endif // !LLVM_2_9
 
         // The user supplied multiple targets
         std::vector<std::string> targets = lExtractTargets(target);
diff --git a/module.h b/module.h
index 9032548f..d62728c8 100644
--- a/module.h
+++ b/module.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -59,30 +59,33 @@ public:
     int CompileFile();
 
     /** Add a named type definition to the module. */
-    void AddTypeDef(Symbol *sym);
+    void AddTypeDef(const std::string &name, const Type *type,
+                    SourcePos pos);
 
     /** Add a new global variable corresponding to the given Symbol to the
         module.  If non-NULL, initExpr gives the initiailizer expression
         for the global's inital value. */ 
-    void AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst);
+    void AddGlobalVariable(const std::string &name, const Type *type,
+                           Expr *initExpr, bool isConst,
+                           StorageClass storageClass, SourcePos pos);
 
     /** Add a declaration of the function defined by the given function
         symbol to the module. */
-    void AddFunctionDeclaration(Symbol *funSym, bool isInline);
+    void AddFunctionDeclaration(const std::string &name,
+                                const FunctionType *ftype, 
+                                StorageClass sc, bool isInline, SourcePos pos);
 
     /** Adds the function described by the declaration information and the
         provided statements to the module. */
-    void AddFunctionDefinition(Symbol *sym, const std::vector<Symbol *> &args,
-                               Stmt *code);
+    void AddFunctionDefinition(const std::string &name,
+                               const FunctionType *ftype, Stmt *code);
 
     /** After a source file has been compiled, output can be generated in a
         number of different formats. */
     enum OutputType { Asm,      /** Generate text assembly language output */
                       Bitcode,  /** Generate LLVM IR bitcode output */
                       Object,   /** Generate a native object file */
-#ifndef LLVM_2_9
                       CXX,      /** Generate a C++ file */
-#endif // !LLVM_2_9
                       Header    /** Generate a C/C++ header file with 
                                     declarations of 'export'ed functions, global
                                     variables, and the types used by them. */
diff --git a/opt.cpp b/opt.cpp
index 1ebfd4a4..6630331a 100644
--- a/opt.cpp
+++ b/opt.cpp
@@ -59,9 +59,6 @@
 #include <llvm/Constants.h>
 #include <llvm/Analysis/ConstantFolding.h>
 #include <llvm/Target/TargetLibraryInfo.h>
-#ifdef LLVM_2_9
-    #include <llvm/Support/StandardPasses.h>
-#endif // LLVM_2_9
 #include <llvm/ADT/Triple.h>
 #include <llvm/Transforms/Scalar.h>
 #include <llvm/Transforms/IPO.h>
@@ -188,13 +185,8 @@ static llvm::Instruction *
 lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, 
           const char *name, llvm::Instruction *insertBefore = NULL) {
     llvm::Value *args[2] = { arg0, arg1 };
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[2]);
     return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
-#else
-    return llvm::CallInst::Create(func, &args[0], &args[2],
-                                  name, insertBefore);
-#endif
 }
 
 
@@ -203,13 +195,8 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
           llvm::Value *arg2, const char *name,
           llvm::Instruction *insertBefore = NULL) {
     llvm::Value *args[3] = { arg0, arg1, arg2 };
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[3]);
     return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
-#else
-    return llvm::CallInst::Create(func, &args[0], &args[3],
-                                  name, insertBefore);
-#endif
 }
 
 
@@ -219,13 +206,8 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
           llvm::Value *arg2, llvm::Value *arg3, const char *name,
           llvm::Instruction *insertBefore = NULL) {
     llvm::Value *args[4] = { arg0, arg1, arg2, arg3 };
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[4]);
     return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
-#else
-    return llvm::CallInst::Create(func, &args[0], &args[4],
-                                  name, insertBefore);
-#endif
 }
 #endif
 
@@ -234,28 +216,19 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
           llvm::Value *arg2, llvm::Value *arg3, llvm::Value *arg4,
           const char *name, llvm::Instruction *insertBefore = NULL) {
     llvm::Value *args[5] = { arg0, arg1, arg2, arg3, arg4 };
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[5]);
     return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
-#else
-    return llvm::CallInst::Create(func, &args[0], &args[5],
-                                  name, insertBefore);
-#endif
 }
 
+
 static llvm::Instruction *
 lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, 
           llvm::Value *arg2, llvm::Value *arg3, llvm::Value *arg4,
           llvm::Value *arg5, const char *name, 
           llvm::Instruction *insertBefore = NULL) {
     llvm::Value *args[6] = { arg0, arg1, arg2, arg3, arg4, arg5 };
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[6]);
     return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
-#else
-    return llvm::CallInst::Create(func, &args[0], &args[6],
-                                  name, insertBefore);
-#endif
 }
 
 
@@ -263,14 +236,9 @@ static llvm::Instruction *
 lGEPInst(llvm::Value *ptr, llvm::Value *offset, const char *name,
          llvm::Instruction *insertBefore) {
     llvm::Value *index[1] = { offset };
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
     return llvm::GetElementPtrInst::Create(ptr, arrayRef, name,
                                            insertBefore);
-#else
-    return llvm::GetElementPtrInst::Create(ptr, &index[0], &index[1],
-                                           name, insertBefore);
-#endif
 }
 
 
@@ -286,6 +254,8 @@ Optimize(llvm::Module *module, int optLevel) {
     llvm::PassManager optPM;
     llvm::FunctionPassManager funcPM(module);
 
+    optPM.add(llvm::createVerifierPass());
+
     if (g->target.isa != Target::GENERIC) {
         llvm::TargetLibraryInfo *targetLibraryInfo =
             new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple()));
@@ -293,9 +263,7 @@ Optimize(llvm::Module *module, int optLevel) {
         optPM.add(new llvm::TargetData(module));
     }
 
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     optPM.add(llvm::createIndVarSimplifyPass());
-#endif
 
     if (optLevel == 0) {
         // This is more or less the minimum set of optimizations that we
@@ -419,32 +387,6 @@ Optimize(llvm::Module *module, int optLevel) {
         optPM.add(CreateIntrinsicsOptPass());
         optPM.add(CreateVSelMovmskOptPass());
 
-#if defined(LLVM_2_9)
-        llvm::createStandardModulePasses(&optPM, 3, 
-                                         false /* opt size */,
-                                         true /* unit at a time */, 
-                                         g->opt.unrollLoops,
-                                         true /* simplify lib calls */,
-                                         false /* may have exceptions */,
-                                         llvm::createFunctionInliningPass());
-        llvm::createStandardLTOPasses(&optPM, true /* internalize pass */,
-                                      true /* inline once again */,
-                                      false /* verify after each pass */);
-        llvm::createStandardFunctionPasses(&optPM, 3);
-
-        optPM.add(CreateIsCompileTimeConstantPass(true));
-        optPM.add(CreateIntrinsicsOptPass());
-        optPM.add(CreateVSelMovmskOptPass());
-
-        llvm::createStandardModulePasses(&optPM, 3, 
-                                         false /* opt size */,
-                                         true /* unit at a time */, 
-                                         g->opt.unrollLoops,
-                                         true /* simplify lib calls */,
-                                         false /* may have exceptions */,
-                                         llvm::createFunctionInliningPass());
-
-#else
         funcPM.add(llvm::createTypeBasedAliasAnalysisPass());
         funcPM.add(llvm::createBasicAliasAnalysisPass());
         funcPM.add(llvm::createCFGSimplificationPass());
@@ -540,7 +482,7 @@ Optimize(llvm::Module *module, int optLevel) {
         optPM.add(llvm::createStripDeadPrototypesPass()); 
         optPM.add(llvm::createGlobalDCEPass());         
         optPM.add(llvm::createConstantMergePass());     
-#endif
+
         optPM.add(CreateMakeInternalFuncsStaticPass());
         optPM.add(llvm::createGlobalDCEPass());
     }
@@ -631,22 +573,18 @@ IntrinsicsOpt::IntrinsicsOpt()
         llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse_movmsk_ps);
     maskInstructions.push_back(sseMovmsk);
     maskInstructions.push_back(m->module->getFunction("__movmsk"));
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     llvm::Function *avxMovmsk = 
         llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_movmsk_ps_256);
     Assert(avxMovmsk != NULL);
     maskInstructions.push_back(avxMovmsk);
-#endif
 
     // And all of the blend instructions
     blendInstructions.push_back(BlendInstruction(
         llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_sse41_blendvps),
         0xf, 0, 1, 2));
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     blendInstructions.push_back(BlendInstruction(
         llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_blendv_ps_256),
         0xff, 0, 1, 2));
-#endif
 }
 
 
@@ -744,7 +682,6 @@ lIsUndef(llvm::Value *value) {
 
 bool
 IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
     llvm::Function *avxMaskedLoad32 = 
         llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskload_ps_256);
     llvm::Function *avxMaskedLoad64 = 
@@ -755,7 +692,6 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
         llvm::Intrinsic::getDeclaration(m->module, llvm::Intrinsic::x86_avx_maskstore_pd_256);
     Assert(avxMaskedLoad32 != NULL && avxMaskedStore32 != NULL);
     Assert(avxMaskedLoad64 != NULL && avxMaskedStore64 != NULL);
-#endif
 
     bool modifiedAny = false;
  restart:
@@ -827,7 +763,6 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
                 goto restart;
             }
         }
-#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
         else if (callInst->getCalledFunction() == avxMaskedLoad32 ||
                  callInst->getCalledFunction() == avxMaskedLoad64) {
             llvm::Value *factor = callInst->getArgOperand(1);
@@ -894,7 +829,6 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
                 goto restart;
             }
         }
-#endif
     }
     return modifiedAny;
 }
@@ -1050,7 +984,7 @@ static llvm::Value *
 lCheckForActualPointer(llvm::Value *v) {
     if (v == NULL)
         return NULL;
-    else if (llvm::isa<LLVM_TYPE_CONST llvm::PointerType>(v->getType()))
+    else if (llvm::isa<llvm::PointerType>(v->getType()))
         return v;
     else if (llvm::isa<llvm::PtrToIntInst>(v))
         return v;
@@ -1247,13 +1181,9 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets,
         }
 
         Assert(base != NULL);
-#ifdef LLVM_2_9
-        *offsets = llvm::ConstantVector::get(delta);
-#else
         llvm::ArrayRef<llvm::Constant *> deltas(&delta[0], 
                                                 &delta[elements.size()]);
         *offsets = llvm::ConstantVector::get(deltas);
-#endif
         return base;
     }
 
@@ -1978,8 +1908,8 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
         }
         else if (maskAsInt == allOnMask) {
             // The mask is all on, so turn this into a regular store
-            LLVM_TYPE_CONST llvm::Type *rvalueType = rvalue->getType();
-            LLVM_TYPE_CONST llvm::Type *ptrType = 
+            llvm::Type *rvalueType = rvalue->getType();
+            llvm::Type *ptrType = 
                 llvm::PointerType::get(rvalueType, 0);
 
             lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
@@ -2081,7 +2011,7 @@ MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
         }
         else if (maskAsInt == allOnMask) {
             // The mask is all on, so turn this into a regular load
-            LLVM_TYPE_CONST llvm::Type *ptrType = 
+            llvm::Type *ptrType = 
                 llvm::PointerType::get(callInst->getType(), 0);
             ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load", 
                                         callInst);
@@ -2139,17 +2069,17 @@ lIsSafeToBlend(llvm::Value *lvalue) {
     else {
         llvm::AllocaInst *ai = llvm::dyn_cast<llvm::AllocaInst>(lvalue);
         if (ai) {
-            LLVM_TYPE_CONST llvm::Type *type = ai->getType();
-            LLVM_TYPE_CONST llvm::PointerType *pt = 
-                llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(type);
+            llvm::Type *type = ai->getType();
+            llvm::PointerType *pt = 
+                llvm::dyn_cast<llvm::PointerType>(type);
             assert(pt != NULL);
             type = pt->getElementType();
-            LLVM_TYPE_CONST llvm::ArrayType *at;
-            while ((at = llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(type))) {
+            llvm::ArrayType *at;
+            while ((at = llvm::dyn_cast<llvm::ArrayType>(type))) {
                 type = at->getElementType();
             }
-            LLVM_TYPE_CONST llvm::VectorType *vt = 
-                llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(type);
+            llvm::VectorType *vt = 
+                llvm::dyn_cast<llvm::VectorType>(type);
             return (vt != NULL && 
                     (int)vt->getNumElements() == g->target.vectorWidth);
         }
@@ -2302,7 +2232,7 @@ lComputeCommonPointer(llvm::Value *base, llvm::Value *offsets,
 
 struct ScatterImpInfo {
     ScatterImpInfo(const char *pName, const char *msName, 
-                   LLVM_TYPE_CONST llvm::Type *vpt, int a)
+                   llvm::Type *vpt, int a)
         : align(a) {
         pseudoFunc = m->module->getFunction(pName);
         maskedStoreFunc = m->module->getFunction(msName);
@@ -2311,7 +2241,7 @@ struct ScatterImpInfo {
     }
     llvm::Function *pseudoFunc;
     llvm::Function *maskedStoreFunc;
-    LLVM_TYPE_CONST llvm::Type *vecPtrType;
+    llvm::Type *vecPtrType;
     const int align;
 };
     
@@ -2812,7 +2742,7 @@ lCoalescePerfInfo(const std::vector<llvm::CallInst *> &coalesceGroup,
  */
 llvm::Value *
 lGEPAndLoad(llvm::Value *basePtr, int64_t offset, int align,
-            llvm::Instruction *insertBefore, LLVM_TYPE_CONST llvm::Type *type) {
+            llvm::Instruction *insertBefore, llvm::Type *type) {
     llvm::Value *ptr = lGEPInst(basePtr, LLVMInt64(offset), "new_base",
                                 insertBefore);
     ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(type, 0),
@@ -2866,7 +2796,7 @@ lEmitLoads(llvm::Value *basePtr, std::vector<CoalescedLoadOp> &loadOps,
         }
         case 4: {
             // 4-wide vector load
-            LLVM_TYPE_CONST llvm::VectorType *vt =
+            llvm::VectorType *vt =
                 llvm::VectorType::get(LLVMTypes::Int32Type, 4);
             loadOps[i].load = lGEPAndLoad(basePtr, start, align,
                                           insertBefore, vt);
@@ -2874,7 +2804,7 @@ lEmitLoads(llvm::Value *basePtr, std::vector<CoalescedLoadOp> &loadOps,
         }
         case 8: {
             // 8-wide vector load
-            LLVM_TYPE_CONST llvm::VectorType *vt =
+            llvm::VectorType *vt =
                 llvm::VectorType::get(LLVMTypes::Int32Type, 8);
             loadOps[i].load = lGEPAndLoad(basePtr, start, align, 
                                           insertBefore, vt);
@@ -2966,7 +2896,7 @@ lApplyLoad2(llvm::Value *result, const CoalescedLoadOp &load,
             Assert(set[elt] == false && set[elt+1] == false);
 
             // In this case, we bitcast from a 4xi32 to a 2xi64 vector
-            LLVM_TYPE_CONST llvm::Type *vec2x64Type = 
+            llvm::Type *vec2x64Type = 
                 llvm::VectorType::get(LLVMTypes::Int64Type, 2);
             result = new llvm::BitCastInst(result, vec2x64Type, "to2x64",
                                            insertBefore);
@@ -2978,7 +2908,7 @@ lApplyLoad2(llvm::Value *result, const CoalescedLoadOp &load,
                                                      "insert64", insertBefore);
             
             // And back to 4xi32.
-            LLVM_TYPE_CONST llvm::Type *vec4x32Type = 
+            llvm::Type *vec4x32Type = 
                 llvm::VectorType::get(LLVMTypes::Int32Type, 4);
             result = new llvm::BitCastInst(result, vec4x32Type, "to4x32",
                                            insertBefore);
@@ -3058,7 +2988,7 @@ lApplyLoad4(llvm::Value *result, const CoalescedLoadOp &load,
 static llvm::Value *
 lAssemble4Vector(const std::vector<CoalescedLoadOp> &loadOps, 
                  const int64_t offsets[4], llvm::Instruction *insertBefore) {
-    LLVM_TYPE_CONST llvm::Type *returnType = 
+    llvm::Type *returnType = 
         llvm::VectorType::get(LLVMTypes::Int32Type, 4);
     llvm::Value *result = llvm::UndefValue::get(returnType);
 
@@ -3198,7 +3128,7 @@ lApplyLoad12s(llvm::Value *result, const std::vector<CoalescedLoadOp> &loadOps,
 static llvm::Value *
 lAssemble4Vector(const std::vector<CoalescedLoadOp> &loadOps, 
                  const int64_t offsets[4], llvm::Instruction *insertBefore) {
-    LLVM_TYPE_CONST llvm::Type *returnType = 
+    llvm::Type *returnType = 
         llvm::VectorType::get(LLVMTypes::Int32Type, 4);
     llvm::Value *result = llvm::UndefValue::get(returnType);
 
@@ -3405,13 +3335,9 @@ lCoalesceGathers(const std::vector<llvm::CallInst *> &coalesceGroup) {
     memory. */
 static bool
 lInstructionMayWriteToMemory(llvm::Instruction *inst) {
-#ifdef LLVM_2_9
-    if (llvm::isa<llvm::StoreInst>(inst))
-#else
     if (llvm::isa<llvm::StoreInst>(inst) ||
         llvm::isa<llvm::AtomicRMWInst>(inst) ||
         llvm::isa<llvm::AtomicCmpXchgInst>(inst))
-#endif // !LLVM_2_9
         // FIXME: we could be less conservative and try to allow stores if
         // we are sure that the pointers don't overlap..
         return true;
diff --git a/parse.yy b/parse.yy
index 1fa8336f..30144a67 100644
--- a/parse.yy
+++ b/parse.yy
@@ -173,8 +173,11 @@ struct ForeachDimension {
 }
 
 
-%token TOKEN_INT32_CONSTANT TOKEN_UINT32_CONSTANT TOKEN_INT64_CONSTANT
-%token TOKEN_UINT64_CONSTANT TOKEN_FLOAT_CONSTANT TOKEN_STRING_C_LITERAL
+%token TOKEN_INT32_CONSTANT TOKEN_UINT32_CONSTANT 
+%token TOKEN_INT64_CONSTANT TOKEN_UINT64_CONSTANT 
+%token TOKEN_INT32DOTDOTDOT_CONSTANT TOKEN_UINT32DOTDOTDOT_CONSTANT 
+%token TOKEN_INT64DOTDOTDOT_CONSTANT TOKEN_UINT64DOTDOTDOT_CONSTANT
+%token TOKEN_FLOAT_CONSTANT TOKEN_STRING_C_LITERAL
 %token TOKEN_IDENTIFIER TOKEN_STRING_LITERAL TOKEN_TYPE_NAME TOKEN_NULL
 %token TOKEN_PTR_OP TOKEN_INC_OP TOKEN_DEC_OP TOKEN_LEFT_OP TOKEN_RIGHT_OP 
 %token TOKEN_LE_OP TOKEN_GE_OP TOKEN_EQ_OP TOKEN_NE_OP
@@ -196,7 +199,7 @@ struct ForeachDimension {
 %token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE TOKEN_CBREAK
 %token TOKEN_CCONTINUE TOKEN_CRETURN TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT
 
-%type <expr> primary_expression postfix_expression
+%type <expr> primary_expression postfix_expression integer_dotdotdot
 %type <expr> unary_expression cast_expression funcall_expression launch_expression
 %type <expr> multiplicative_expression additive_expression shift_expression
 %type <expr> relational_expression equality_expression and_expression
@@ -250,6 +253,12 @@ struct ForeachDimension {
 
 string_constant
     : TOKEN_STRING_LITERAL { $$ = new std::string(*yylval.stringVal); }
+    | string_constant TOKEN_STRING_LITERAL
+    {
+        std::string s = *((std::string *)$1);
+        s += *yylval.stringVal;
+        $$ = new std::string(s);
+    }
     ;
 
 primary_expression
@@ -622,7 +631,9 @@ declaration_statement
                 if ($1->declarators[i] == NULL)
                     Assert(m->errorCount > 0);
                 else
-                    m->AddTypeDef($1->declarators[i]->GetSymbol());
+                    m->AddTypeDef($1->declarators[i]->name,
+                                  $1->declarators[i]->type,
+                                  $1->declarators[i]->pos);
             }
             $$ = NULL;
         }
@@ -801,7 +812,6 @@ storage_class_specifier
     : TOKEN_TYPEDEF { $$ = SC_TYPEDEF; }
     | TOKEN_EXTERN { $$ = SC_EXTERN; }
     | TOKEN_EXTERN TOKEN_STRING_C_LITERAL  { $$ = SC_EXTERN_C; }
-    | TOKEN_EXPORT { $$ = SC_EXPORT; }
     | TOKEN_STATIC { $$ = SC_STATIC; }
     ;
 
@@ -864,7 +874,6 @@ struct_or_union_specifier
               std::vector<SourcePos> elementPositions;
               GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames,
                                            &elementPositions);
-              // FIXME: should be unbound
               $$ = new StructType("", elementTypes, elementNames, elementPositions,
                                   false, Variability::Unbound, @1);
           }
@@ -882,10 +891,9 @@ struct_or_union_specifier
     | struct_or_union struct_or_union_name
       { 
           const Type *st = m->symbolTable->LookupType($2); 
-          if (!st) {
-              std::vector<std::string> alternates = m->symbolTable->ClosestTypeMatch($2);
-              std::string alts = lGetAlternates(alternates);
-              Error(@2, "Struct type \"%s\" unknown.%s", $2, alts.c_str());
+          if (st == NULL) {
+              st = new UndefinedStructType($2, Variability::Unbound, false, @2);
+              m->symbolTable->AddType($2, st, @2);
           }
           else if (dynamic_cast<const StructType *>(st) == NULL)
               Error(@2, "Type \"%s\" is not a struct type! (%s)", $2,
@@ -976,6 +984,11 @@ specifier_qualifier_list
                       "function declarations.");
                 $$ = $2;
             }
+            else if ($1 == TYPEQUAL_EXPORT) {
+                Error(@1, "\"export\" qualifier is illegal outside of "
+                      "function declarations.");
+                $$ = $2;
+            }
             else
                 FATAL("Unhandled type qualifier in parser.");
         }
@@ -1108,6 +1121,7 @@ type_qualifier
     | TOKEN_UNIFORM    { $$ = TYPEQUAL_UNIFORM; }
     | TOKEN_VARYING    { $$ = TYPEQUAL_VARYING; }
     | TOKEN_TASK       { $$ = TYPEQUAL_TASK; }
+    | TOKEN_EXPORT     { $$ = TYPEQUAL_EXPORT; }
     | TOKEN_INLINE     { $$ = TYPEQUAL_INLINE; }
     | TOKEN_SIGNED     { $$ = TYPEQUAL_SIGNED; }
     | TOKEN_UNSIGNED   { $$ = TYPEQUAL_UNSIGNED; }
@@ -1160,7 +1174,7 @@ direct_declarator
     : TOKEN_IDENTIFIER
       {
           Declarator *d = new Declarator(DK_BASE, @1);
-          d->sym = new Symbol(yytext, @1);
+          d->name = yytext;
           $$ = d;
       }
     | '(' declarator ')' 
@@ -1335,8 +1349,10 @@ type_name
     {
         if ($1 == NULL || $2 == NULL)
             $$ = NULL;
-        else
-            $$ = $2->GetType($1, NULL);
+        else {
+            $2->InitFromType($1, NULL);
+            $$ = $2->type;
+        }
     }
     ;
 
@@ -1614,11 +1630,34 @@ foreach_active_identifier
     }
     ;
 
+integer_dotdotdot
+    : TOKEN_INT32DOTDOTDOT_CONSTANT {
+        $$ = new ConstExpr(AtomicType::UniformInt32->GetAsConstType(),
+                           (int32_t)yylval.intVal, @1); 
+    }
+    | TOKEN_UINT32DOTDOTDOT_CONSTANT {
+        $$ = new ConstExpr(AtomicType::UniformUInt32->GetAsConstType(),
+                           (uint32_t)yylval.intVal, @1); 
+    }
+    | TOKEN_INT64DOTDOTDOT_CONSTANT {
+        $$ = new ConstExpr(AtomicType::UniformInt64->GetAsConstType(),
+                           (int64_t)yylval.intVal, @1); 
+    }
+    | TOKEN_UINT64DOTDOTDOT_CONSTANT {
+        $$ = new ConstExpr(AtomicType::UniformUInt64->GetAsConstType(),
+                           (uint64_t)yylval.intVal, @1); 
+    }
+    ;
+
 foreach_dimension_specifier
     : foreach_identifier '=' assignment_expression TOKEN_DOTDOTDOT assignment_expression
     {
         $$ = new ForeachDimension($1, $3, $5);
     }
+    | foreach_identifier '=' integer_dotdotdot assignment_expression
+    {
+        $$ = new ForeachDimension($1, $3, $4);
+    }
     ;
 
 foreach_dimension_list
@@ -1817,11 +1856,14 @@ function_definition
     } 
     compound_statement
     {
-        std::vector<Symbol *> args;
         if ($2 != NULL) {
-            Symbol *sym = $2->GetFunctionInfo($1, &args);
-            if (sym != NULL)
-                m->AddFunctionDefinition(sym, args, $4);
+            $2->InitFromDeclSpecs($1);
+            const FunctionType *funcType =
+                dynamic_cast<const FunctionType *>($2->type);
+            if (funcType == NULL)
+                Assert(m->errorCount > 0);
+            else
+                m->AddFunctionDefinition($2->name, funcType, $4);
         }
         m->symbolTable->PopScope(); // push in lAddFunctionParams();
     }
@@ -1931,35 +1973,27 @@ lAddDeclaration(DeclSpecs *ds, Declarator *decl) {
         // Error happened earlier during parsing
         return;
 
+    decl->InitFromDeclSpecs(ds);
     if (ds->storageClass == SC_TYPEDEF)
-        m->AddTypeDef(decl->GetSymbol());
+        m->AddTypeDef(decl->name, decl->type, decl->pos);
     else {
-        const Type *t = decl->GetType(ds);
-        if (t == NULL) {
+        if (decl->type == NULL) {
             Assert(m->errorCount > 0);
             return;
         }
 
-        Symbol *sym = decl->GetSymbol();
-        if (sym == NULL) {
-            Assert(m->errorCount > 0);
-            return;
-        }
-
-        const FunctionType *ft = dynamic_cast<const FunctionType *>(t);
+        decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
+        
+        const FunctionType *ft = dynamic_cast<const FunctionType *>(decl->type);
         if (ft != NULL) {
-            sym->type = ft;
-            sym->storageClass = ds->storageClass;
             bool isInline = (ds->typeQualifiers & TYPEQUAL_INLINE);
-            m->AddFunctionDeclaration(sym, isInline);
+            m->AddFunctionDeclaration(decl->name, ft, ds->storageClass,
+                                      isInline, decl->pos);
         }
         else {
-            if (sym->type == NULL)
-                Assert(m->errorCount > 0);
-            else
-                sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
             bool isConst = (ds->typeQualifiers & TYPEQUAL_CONST) != 0;
-            m->AddGlobalVariable(sym, decl->initExpr, isConst);
+            m->AddGlobalVariable(decl->name, decl->type, decl->initExpr,
+                                 isConst, decl->storageClass, decl->pos);
         }
     }
 }
@@ -1988,16 +2022,13 @@ lAddFunctionParams(Declarator *decl) {
     // now loop over its parameters and add them to the symbol table
     for (unsigned int i = 0; i < decl->functionParams.size(); ++i) {
         Declaration *pdecl = decl->functionParams[i];
-        if (pdecl == NULL || pdecl->declarators.size() == 0)
-            // zero size declarators array corresponds to an anonymous 
-            // parameter
-            continue;
-        Assert(pdecl->declarators.size() == 1);
-        Symbol *sym = pdecl->declarators[0]->GetSymbol();
-        if (sym == NULL || sym->type == NULL)
+        Assert(pdecl != NULL && pdecl->declarators.size() == 1);
+        Declarator *declarator = pdecl->declarators[0];
+        if (declarator == NULL)
             Assert(m->errorCount > 0);
         else {
-            sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
+            Symbol *sym = new Symbol(declarator->name, declarator->pos,
+                                     declarator->type, declarator->storageClass);
 #ifndef NDEBUG
             bool ok = m->symbolTable->AddVariable(sym);
             if (ok == false)
@@ -2064,8 +2095,6 @@ lGetStorageClassString(StorageClass sc) {
         return "";
     case SC_EXTERN:
         return "extern";
-    case SC_EXPORT:
-        return "export";
     case SC_STATIC:
         return "static";
     case SC_TYPEDEF:
diff --git a/stdlib.ispc b/stdlib.ispc
index f5984277..fd0df7ce 100644
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -1,6 +1,6 @@
 // -*- mode: c++ -*-
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -746,6 +746,125 @@ static inline void prefetch_nt(const void * varying ptr) {
     }
 }
 
+///////////////////////////////////////////////////////////////////////////
+// non-short-circuiting alternatives
+
+__declspec(safe,cost1)
+static inline bool and(bool a, bool b) {
+    return a && b;
+}
+
+__declspec(safe,cost1)
+static inline uniform bool and(uniform bool a, uniform bool b) {
+    return a && b;
+}
+
+__declspec(safe,cost1)
+static inline bool or(bool a, bool b) {
+    return a || b;
+}
+
+__declspec(safe,cost1)
+static inline uniform bool or(uniform bool a, uniform bool b) {
+    return a || b;
+}
+
+__declspec(safe,cost1)
+static inline int8 select(bool c, int8 a, int8 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline int8 select(uniform bool c, int8 a, int8 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline uniform int8 select(uniform bool c, uniform int8 a,
+                                  uniform int8 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline int16 select(bool c, int16 a, int16 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline int16 select(uniform bool c, int16 a, int16 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline uniform int16 select(uniform bool c, uniform int16 a,
+                                   uniform int16 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline int32 select(bool c, int32 a, int32 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline int32 select(uniform bool c, int32 a, int32 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline uniform int32 select(uniform bool c, uniform int32 a,
+                                   uniform int32 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline int64 select(bool c, int64 a, int64 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline int64 select(uniform bool c, int64 a, int64 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline uniform int64 select(uniform bool c, uniform int64 a,
+                                   uniform int64 b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline float select(bool c, float a, float b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline float select(uniform bool c, float a, float b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline uniform float select(uniform bool c, uniform float a,
+                                   uniform float b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline double select(bool c, double a, double b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline double select(uniform bool c, double a, double b) {
+    return c ? a : b;
+}
+
+__declspec(safe,cost1)
+static inline uniform double select(uniform bool c, uniform double a,
+                                    uniform double b) {
+    return c ? a : b;
+}
+
 ///////////////////////////////////////////////////////////////////////////
 // Horizontal ops / reductions
 
@@ -1469,22 +1588,17 @@ static inline void memory_barrier() {
 
 #define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE)                        \
 static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
-    memory_barrier();                                                   \
     TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
-    memory_barrier();                                                   \
     return ret;                                                         \
 }                                                                       \
 static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
                                                uniform TA value) {      \
-    memory_barrier();                                                   \
     uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
-    memory_barrier();                                                   \
     return ret;                                                         \
 }                                                                       \
 static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
     uniform TA * uniform ptrArray[programCount];                        \
     ptrArray[programIndex] = ptr;                                       \
-    memory_barrier();                                                   \
     TA ret;                                                             \
     __foreach_active (i) {                                              \
         uniform TA * uniform p = ptrArray[i];                           \
@@ -1492,13 +1606,11 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
         uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v);    \
         ret = insert(ret, i, r);                                        \
     }                                                                   \
-    memory_barrier();                                                   \
     return ret;                                                         \
 }                                                                       \
 
 #define DEFINE_ATOMIC_SWAP(TA,TB)                                       \
 static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
-    memory_barrier();                                                   \
     uniform int i = 0;                                                  \
     TA ret[programCount];                                               \
     TA memVal;                                                          \
@@ -1528,20 +1640,16 @@ static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
     /* And the last instance that wanted to swap gets the value we      \
        originally got back from memory... */                            \
     ret[lastSwap] = memVal;                                             \
-    memory_barrier();                                                   \
     return ret[programIndex];                                           \
 }                                                                       \
 static inline uniform TA atomic_swap_global(uniform TA * uniform ptr,   \
                                             uniform TA value) {         \
-    memory_barrier();                                                   \
     uniform TA ret = __atomic_swap_uniform_##TB##_global(ptr, value);   \
-    memory_barrier();                                                   \
     return ret;                                                         \
 }                                                                       \
 static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
     uniform TA * uniform ptrArray[programCount];                        \
     ptrArray[programIndex] = ptr;                                       \
-    memory_barrier();                                                   \
     TA ret;                                                             \
     __foreach_active (i) {                                              \
         uniform TA * uniform p = ptrArray[i];                           \
@@ -1549,7 +1657,6 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
         uniform TA r = __atomic_swap_uniform_##TB##_global(p, v);       \
         ret = insert(ret, i, r);                                        \
     }                                                                   \
-    memory_barrier();                                                   \
     return ret;                                                         \
 }                                                                       \
 
@@ -1557,25 +1664,19 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
 static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
     uniform TA oneval = reduce_##OPA(value);                            \
     TA ret;                                                             \
-    if (lanemask() != 0) {                                              \
-        memory_barrier();                                               \
+    if (lanemask() != 0)                                                \
         ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval);      \
-        memory_barrier();                                               \
-    }                                                                   \
     return ret;                                                         \
 }                                                                       \
 static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
                                                uniform TA value) {      \
-    memory_barrier();                                                   \
     uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
-    memory_barrier();                                                   \
     return ret;                                                         \
 }                                                                       \
 static inline TA atomic_##OPA##_global(uniform TA * varying ptr,        \
                                        TA value) {                      \
     uniform TA * uniform ptrArray[programCount];                        \
     ptrArray[programIndex] = ptr;                                       \
-    memory_barrier();                                                   \
     TA ret;                                                             \
     __foreach_active (i) {                                              \
         uniform TA * uniform p = ptrArray[i];                           \
@@ -1583,7 +1684,6 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr,        \
         uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v);    \
         ret = insert(ret, i, r);                                        \
     }                                                                   \
-    memory_barrier();                                                   \
     return ret;                                                         \
 }
 
@@ -1638,25 +1738,20 @@ DEFINE_ATOMIC_SWAP(double,double)
 #define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE)                           \
 static inline uniform TA atomic_compare_exchange_global(               \
          uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \
-    memory_barrier();                                                      \
     uniform TA ret =                                                    \
         __atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval); \
-    memory_barrier();                                                   \
     return ret;                                                         \
 }                                                                       \
 static inline TA atomic_compare_exchange_global(                           \
          uniform TA * uniform ptr, TA oldval, TA newval) {                 \
-    memory_barrier();                                                      \
     TA ret = __atomic_compare_exchange_##TB##_global(ptr, oldval, newval,  \
                                                      (MASKTYPE)__mask);    \
-    memory_barrier();                                                      \
     return ret;                                                            \
 } \
 static inline TA atomic_compare_exchange_global(               \
          uniform TA * varying ptr, TA oldval, TA newval) { \
     uniform TA * uniform ptrArray[programCount];                        \
     ptrArray[programIndex] = ptr;                                       \
-    memory_barrier();                                                   \
     TA ret;                                                             \
     __foreach_active (i) {                                              \
         uniform TA r =                                                  \
@@ -1665,7 +1760,6 @@ static inline TA atomic_compare_exchange_global(               \
                                                             extract(newval, i)); \
         ret = insert(ret, i, r);                                        \
     }                                                                   \
-    memory_barrier();                                                   \
     return ret;                                                         \
 }
 
@@ -1678,6 +1772,49 @@ ATOMIC_DECL_CMPXCHG(double, double, IntMaskType)
 
 #undef ATOMIC_DECL_CMPXCHG
 
+// void * variants of swap and compare exchange
+
+static inline void *atomic_swap_global(void ** uniform ptr,
+                                       void * value) {
+    return (void *)atomic_swap_global((intptr_t * uniform)ptr,
+                                      (intptr_t)value);
+}
+
+static inline void * uniform atomic_swap_global(void ** uniform ptr,
+                                                void * uniform value) {
+    return (void * uniform)atomic_swap_global((intptr_t * uniform)ptr,
+                                              (uniform intptr_t)value);
+}
+
+static inline void *atomic_swap_global(void ** ptr, void * value) {
+    return (void *)atomic_swap_global((intptr_t *)ptr,
+                                      (intptr_t)value);
+}
+
+static inline void * 
+atomic_compare_exchange_global(void ** uniform ptr, 
+                               void * oldval, void * newval) {
+    return (void *)atomic_compare_exchange_global((intptr_t * uniform)ptr,
+                                                  (intptr_t)oldval,
+                                                  (intptr_t)newval);
+}
+
+static inline void * uniform
+atomic_compare_exchange_global(void ** uniform ptr, void * uniform oldval, 
+                               void * uniform newval) { 
+    return (void * uniform)atomic_compare_exchange_global((intptr_t * uniform)ptr,
+                                                          (uniform intptr_t)oldval,
+                                                          (uniform intptr_t)newval);
+}
+
+static inline void *
+atomic_compare_exchange_global(void ** ptr, void * oldval,
+                               void * newval) {
+    return (void *)atomic_compare_exchange_global((intptr_t *)ptr,
+                                                  (intptr_t)oldval,
+                                                  (intptr_t)newval);
+}
+
 ///////////////////////////////////////////////////////////////////////////
 // local atomics
 
@@ -1849,6 +1986,49 @@ LOCAL_CMPXCHG(double)
 #undef LOCAL_ATOMIC
 #undef LOCAL_CMPXCHG
 
+// void * variants of swap and compare exchange
+
+static inline void *atomic_swap_local(void ** uniform ptr,
+                                      void * value) {
+    return (void *)atomic_swap_local((intptr_t * uniform)ptr,
+                                      (intptr_t)value);
+}
+
+static inline void * uniform atomic_swap_local(void ** uniform ptr,
+                                               void * uniform value) {
+    return (void * uniform)atomic_swap_local((intptr_t * uniform)ptr,
+                                              (uniform intptr_t)value);
+}
+
+static inline void *atomic_swap_local(void ** ptr, void * value) {
+    return (void *)atomic_swap_local((intptr_t *)ptr,
+                                      (intptr_t)value);
+}
+
+static inline void * 
+atomic_compare_exchange_local(void ** uniform ptr, 
+                              void * oldval, void * newval) {
+    return (void *)atomic_compare_exchange_local((intptr_t * uniform)ptr,
+                                                  (intptr_t)oldval,
+                                                  (intptr_t)newval);
+}
+
+static inline void * uniform
+atomic_compare_exchange_local(void ** uniform ptr, void * uniform oldval, 
+                              void * uniform newval) { 
+    return (void * uniform)atomic_compare_exchange_local((intptr_t * uniform)ptr,
+                                                          (uniform intptr_t)oldval,
+                                                          (uniform intptr_t)newval);
+}
+
+static inline void *
+atomic_compare_exchange_local(void ** ptr, void * oldval,
+                              void * newval) {
+    return (void *)atomic_compare_exchange_local((intptr_t *)ptr,
+                                                  (intptr_t)oldval,
+                                                  (intptr_t)newval);
+}
+
 ///////////////////////////////////////////////////////////////////////////
 // Transcendentals (float precision)
 
@@ -3551,8 +3731,9 @@ static inline int16 float_to_half(float f) {
         //   like recursive filters in DSP - not a typical half-float application. Whether
         //   FP16 denormals are rare in practice, I don't know. Whatever slow path your HW
         //   may or may not have for denormals, this may well hit it.
-        int32 fint2 = intbits(floatbits(fint & round_mask) * floatbits(magic)) - round_mask;
-        fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed
+        float fscale = floatbits(fint & round_mask) * floatbits(magic);
+        fscale = min(fscale, floatbits((31 << 23) - 0x1000));
+        int32 fint2 = intbits(fscale) - round_mask;
 
         if (fint < f32infty)
             o = fint2 >> 13; // Take the bits!
diff --git a/stmt.cpp b/stmt.cpp
index 9aad4291..4f8c0f12 100644
--- a/stmt.cpp
+++ b/stmt.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -40,6 +40,7 @@
 #include "util.h"
 #include "expr.h"
 #include "type.h"
+#include "func.h"
 #include "sym.h"
 #include "module.h"
 #include "llvmutil.h"
@@ -167,14 +168,28 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
         }
 
         // References must have initializer expressions as well.
-        if (dynamic_cast<const ReferenceType *>(sym->type) && initExpr == NULL) {
-            Error(sym->pos,
-                  "Must provide initializer for reference-type variable \"%s\".",
-                  sym->name.c_str());
-            continue;
+        if (IsReferenceType(sym->type) == true) {
+            if (initExpr == NULL) {
+                Error(sym->pos, "Must provide initializer for reference-type "
+                      "variable \"%s\".", sym->name.c_str());
+                continue;
+            }
+            if (IsReferenceType(initExpr->GetType()) == false) {
+                const Type *initLVType = initExpr->GetLValueType();
+                if (initLVType == NULL) {
+                    Error(initExpr->pos, "Initializer for reference-type variable "
+                          "\"%s\" must have an lvalue type.", sym->name.c_str());
+                    continue;
+                }
+                if (initLVType->IsUniformType() == false) {
+                    Error(initExpr->pos, "Initializer for reference-type variable "
+                          "\"%s\" must have a uniform lvalue type.", sym->name.c_str());
+                    continue;
+                }
+            }
         }
 
-        LLVM_TYPE_CONST llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
+        llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
         if (llvmType == NULL) {
             Assert(m->errorCount > 0);
             return;
@@ -2173,8 +2188,8 @@ SwitchStmt::EstimateCost() const {
 ///////////////////////////////////////////////////////////////////////////
 // ReturnStmt
 
-ReturnStmt::ReturnStmt(Expr *v, bool cc, SourcePos p) 
-    : Stmt(p), val(v), 
+ReturnStmt::ReturnStmt(Expr *e, bool cc, SourcePos p) 
+    : Stmt(p), expr(e), 
       doCoherenceCheck(cc && !g->opt.disableCoherentControlFlow) {
 }
 
@@ -2189,8 +2204,29 @@ ReturnStmt::EmitCode(FunctionEmitContext *ctx) const {
         return;
     }
 
+    // Make sure we're not trying to return a reference to something where
+    // that doesn't make sense
+    const Function *func = ctx->GetFunction();
+    const Type *returnType = func->GetReturnType();
+    if (IsReferenceType(returnType) == true &&
+        IsReferenceType(expr->GetType()) == false) {
+        const Type *lvType = expr->GetLValueType();
+        if (lvType == NULL) {
+            Error(expr->pos, "Illegal to return non-lvalue from function "
+                  "returning reference type \"%s\".",
+                  returnType->GetString().c_str());
+            return;
+        }
+        else if (lvType->IsUniformType() == false) {
+            Error(expr->pos, "Illegal to return varying lvalue type from "
+                  "function returning a reference type \"%s\".",
+                  returnType->GetString().c_str());
+            return;
+        }
+    }
+
     ctx->SetDebugPos(pos);
-    ctx->CurrentLanesReturned(val, doCoherenceCheck);
+    ctx->CurrentLanesReturned(expr, doCoherenceCheck);
 }
 
 
@@ -2210,7 +2246,8 @@ void
 ReturnStmt::Print(int indent) const {
     printf("%*c%sReturn Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
     pos.Print();
-    if (val) val->Print();
+    if (expr)
+        expr->Print();
     else printf("(void)");
     printf("\n");
 }
@@ -2228,6 +2265,9 @@ GotoStmt::GotoStmt(const char *l, SourcePos gotoPos, SourcePos ip)
 
 void
 GotoStmt::EmitCode(FunctionEmitContext *ctx) const {
+    if (!ctx->GetCurrentBasicBlock()) 
+        return;
+
     if (ctx->VaryingCFDepth() > 0) {
         Error(pos, "\"goto\" statements are only legal under \"uniform\" "
               "control flow.");
@@ -2457,7 +2497,7 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
     else {
         argTypes.push_back(t);
 
-        LLVM_TYPE_CONST llvm::Type *llvmExprType = type->LLVMType(g->ctx);
+        llvm::Type *llvmExprType = type->LLVMType(g->ctx);
         llvm::Value *ptr = ctx->AllocaInst(llvmExprType, "print_arg");
         llvm::Value *val = expr->GetValue(ctx);
         if (!val)
@@ -2478,6 +2518,9 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
  */
 void
 PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
+    if (!ctx->GetCurrentBasicBlock()) 
+        return;
+
     ctx->SetDebugPos(pos);
 
     // __do_print takes 5 arguments; we'll get them stored in the args[] array
@@ -2494,7 +2537,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
     std::string argTypes;
 
     if (values == NULL) {
-        LLVM_TYPE_CONST llvm::Type *ptrPtrType = 
+        llvm::Type *ptrPtrType = 
             llvm::PointerType::get(LLVMTypes::VoidPointerType, 0);
         args[4] = llvm::Constant::getNullValue(ptrPtrType);
     }
@@ -2506,7 +2549,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
         int nArgs = elist ? elist->exprs.size() : 1;
 
         // Allocate space for the array of pointers to values to be printed 
-        LLVM_TYPE_CONST llvm::Type *argPtrArrayType = 
+        llvm::Type *argPtrArrayType = 
             llvm::ArrayType::get(LLVMTypes::VoidPointerType, nArgs);
         llvm::Value *argPtrArray = ctx->AllocaInst(argPtrArrayType,
                                                    "print_arg_ptrs");
@@ -2583,6 +2626,9 @@ AssertStmt::AssertStmt(const std::string &msg, Expr *e, SourcePos p)
 
 void
 AssertStmt::EmitCode(FunctionEmitContext *ctx) const {
+    if (!ctx->GetCurrentBasicBlock()) 
+        return;
+
     if (expr == NULL)
         return;
     const Type *type = expr->GetType();
@@ -2658,6 +2704,9 @@ DeleteStmt::DeleteStmt(Expr *e, SourcePos p)
 
 void
 DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
+    if (!ctx->GetCurrentBasicBlock()) 
+        return;
+
     const Type *exprType;
     if (expr == NULL || ((exprType = expr->GetType()) == NULL)) {
         Assert(m->errorCount > 0);
@@ -2774,7 +2823,7 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) {
     Expr *maskVecExpr = new SymbolExpr(maskSym, pos);
     std::vector<Symbol *> mmFuns;
     m->symbolTable->LookupFunction("__movmsk", &mmFuns);
-    Assert(mmFuns.size() == 2);
+    Assert(mmFuns.size() == (g->target.maskBitCount == 32 ? 2 : 1));
     FunctionSymbolExpr *movmskFunc = new FunctionSymbolExpr("__movmsk", mmFuns,
                                                             pos);
     ExprList *movmskArgs = new ExprList(maskVecExpr, pos);
diff --git a/stmt.h b/stmt.h
index da418ec7..88115ab2 100644
--- a/stmt.h
+++ b/stmt.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -265,7 +265,7 @@ public:
     statement in the program. */
 class ReturnStmt : public Stmt {
 public:
-    ReturnStmt(Expr *v, bool cc, SourcePos p);
+    ReturnStmt(Expr *e, bool cc, SourcePos p);
 
     void EmitCode(FunctionEmitContext *ctx) const;
     void Print(int indent) const;
@@ -273,7 +273,7 @@ public:
     Stmt *TypeCheck();
     int EstimateCost() const;
 
-    Expr *val;
+    Expr *expr;
     /** This indicates whether the generated code will check to see if no
         more program instances are currently running after the return, in
         which case the code can possibly jump to the end of the current
diff --git a/sym.cpp b/sym.cpp
index f60dc1aa..8c7e04a6 100644
--- a/sym.cpp
+++ b/sym.cpp
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -56,12 +56,6 @@ Symbol::Symbol(const std::string &n, SourcePos p, const Type *t,
 }
 
 
-std::string
-Symbol::MangledName() const {
-    return name + type->Mangle();
-}
-
-
 ///////////////////////////////////////////////////////////////////////////
 // SymbolTable
 
@@ -72,7 +66,7 @@ SymbolTable::SymbolTable() {
 
 SymbolTable::~SymbolTable() {
     // Otherwise we have mismatched push/pop scopes
-    Assert(variables.size() == 1 && types.size() == 1);
+    Assert(variables.size() == 1);
     PopScope();
 }
 
@@ -80,7 +74,6 @@ SymbolTable::~SymbolTable() {
 void
 SymbolTable::PushScope() { 
     variables.push_back(new SymbolMapType);
-    types.push_back(new TypeMapType);
 }
 
 
@@ -89,10 +82,6 @@ SymbolTable::PopScope() {
     Assert(variables.size() > 1);
     delete variables.back();
     variables.pop_back();
-
-    Assert(types.size() > 1);
-    delete types.back();
-    types.pop_back();
 }
 
 
@@ -192,26 +181,17 @@ SymbolTable::LookupFunction(const char *name, const FunctionType *type) {
 
 bool
 SymbolTable::AddType(const char *name, const Type *type, SourcePos pos) {
-    // Like AddVariable(), we go backwards through the type maps, working
-    // from innermost scope to outermost.
-    for (int i = types.size()-1; i >= 0; --i) {
-        TypeMapType &sm = *(types[i]);
-        if (sm.find(name) != sm.end()) {
-            if (i == (int)types.size() - 1) {
-                Error(pos, "Ignoring redefinition of type \"%s\".", name);
-                return false;
-            }
-            else {
-                Warning(pos, "Type \"%s\" shadows type declared in outer scope.", name);
-                TypeMapType &sm = *(types.back());
-                sm[name] = type;
-                return true;
-            }
-        }
+    const Type *t = LookupType(name);
+    if (t != NULL && dynamic_cast<const UndefinedStructType *>(t) == NULL) {
+        // If we have a previous declaration of anything other than an
+        // UndefinedStructType with this struct name, issue an error.  If
+        // we have an UndefinedStructType, then we'll fall through to the
+        // code below that adds the definition to the type map.
+        Error(pos, "Ignoring redefinition of type \"%s\".", name);
+        return false;
     }
 
-    TypeMapType &sm = *(types.back());
-    sm[name] = type;
+    types[name] = type;
     return true;
 }
 
@@ -219,11 +199,9 @@ SymbolTable::AddType(const char *name, const Type *type, SourcePos pos) {
 const Type *
 SymbolTable::LookupType(const char *name) const {
     // Again, search through the type maps backward to get scoping right.
-    for (int i = types.size()-1; i >= 0; --i) {
-        TypeMapType &sm = *(types[i]);
-        if (sm.find(name) != sm.end())
-            return sm[name];
-    }
+    TypeMapType::const_iterator iter = types.find(name);
+    if (iter != types.end())
+        return iter->second;
     return NULL;
 }
 
@@ -288,21 +266,19 @@ SymbolTable::closestTypeMatch(const char *str, bool structsVsEnums) const {
     const int maxDelta = 2;
     std::vector<std::string> matches[maxDelta+1];
 
-    for (unsigned int i = 0; i < types.size(); ++i) {
-        TypeMapType::const_iterator iter;
-        for (iter = types[i]->begin(); iter != types[i]->end(); ++iter) {
-            // Skip over either StructTypes or EnumTypes, depending on the
-            // value of the structsVsEnums parameter
-            bool isEnum = (dynamic_cast<const EnumType *>(iter->second) != NULL);
-            if (isEnum && structsVsEnums)
-                continue;
-            else if (!isEnum && !structsVsEnums)
-                continue;
+    TypeMapType::const_iterator iter;
+    for (iter = types.begin(); iter != types.end(); ++iter) {
+        // Skip over either StructTypes or EnumTypes, depending on the
+        // value of the structsVsEnums parameter
+        bool isEnum = (dynamic_cast<const EnumType *>(iter->second) != NULL);
+        if (isEnum && structsVsEnums)
+            continue;
+        else if (!isEnum && !structsVsEnums)
+            continue;
 
-            int dist = StringEditDistance(str, iter->first, maxDelta+1);
-            if (dist <= maxDelta)
-                matches[dist].push_back(iter->first);
-        }
+        int dist = StringEditDistance(str, iter->first, maxDelta+1);
+        if (dist <= maxDelta)
+            matches[dist].push_back(iter->first);
     }
 
     for (int i = 0; i <= maxDelta; ++i) {
@@ -342,16 +318,12 @@ SymbolTable::Print() {
 
     depth = 0;
     fprintf(stderr, "Named types:\n---------------\n");
-    for (unsigned int i = 0; i < types.size(); ++i) {
-        TypeMapType &sm = *types[i];
-        TypeMapType::iterator siter = sm.begin();
-        while (siter != sm.end()) {
-            fprintf(stderr, "%*c", depth, ' ');
-            fprintf(stderr, "%s -> %s\n", siter->first.c_str(),
-                    siter->second->GetString().c_str());
-            ++siter;
-        }
-        depth += 4;
+    TypeMapType::iterator siter = types.begin();
+    while (siter != types.end()) {
+        fprintf(stderr, "%*c", depth, ' ');
+        fprintf(stderr, "%s -> %s\n", siter->first.c_str(),
+                siter->second->GetString().c_str());
+        ++siter;
     }
 }
 
@@ -382,14 +354,11 @@ SymbolTable::RandomSymbol() {
 
 const Type *
 SymbolTable::RandomType() {
-    int v = ispcRand() % types.size();
-    if (types[v]->size() == 0)
-        return NULL;
-    int count = ispcRand() % types[v]->size();
-    TypeMapType::iterator iter = types[v]->begin();
+    int count = types.size();
+    TypeMapType::iterator iter = types.begin();
     while (count-- > 0) {
         ++iter;
-        Assert(iter != types[v]->end());
+        Assert(iter != types.end());
     }
     return iter->second;
 }
diff --git a/sym.h b/sym.h
index fa452326..43c8ff16 100644
--- a/sym.h
+++ b/sym.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2012, Intel Corporation
   All rights reserved.
 
   Redistribution and use in source and binary forms, with or without
@@ -67,15 +67,8 @@ public:
     Symbol(const std::string &name, SourcePos pos, const Type *t = NULL,
            StorageClass sc = SC_NONE);
 
-    /** This method should only be called for function symbols; for them,
-        it returns a mangled version of the function name with the argument
-        types encoded into the returned name.  This is used to generate
-        unique symbols in object files for overloaded functions.
-     */
-    std::string MangledName() const;
-
     SourcePos pos;            /*!< Source file position where the symbol was defined */
-    const std::string name;   /*!< Symbol's name */
+    std::string name;         /*!< Symbol's name */
     llvm::Value *storagePtr;  /*!< For symbols with storage associated with
                                    them (i.e. variables but not functions),
                                    this member stores a pointer to its
@@ -208,6 +201,9 @@ public:
     /** Adds the named type to the symbol table.  This is used for both
         struct definitions (where <tt>struct Foo</tt> causes type \c Foo to
         be added to the symbol table) as well as for <tt>typedef</tt>s.
+        For structs with forward declarations ("struct Foo;") and are thus
+        UndefinedStructTypes, this method replaces these with an actual
+        struct definition if one is provided.
 
         @param name Name of the type to be added
         @param type Type that \c name represents
@@ -272,12 +268,10 @@ private:
     typedef std::map<std::string, std::vector<Symbol *> > FunctionMapType;
     FunctionMapType functions;
 
-    /** Type definitions can also be scoped.  A new \c TypeMapType
-        is added to the back of the \c types \c vector each time a new scope
-        is entered.  (And it's removed when the scope exits).
+    /** Type definitions can't currently be scoped.
      */
     typedef std::map<std::string, const Type *> TypeMapType;
-    std::vector<TypeMapType *> types;
+    TypeMapType types;
 };
 
 
diff --git a/tests/func-anon-param.ispc b/tests/func-anon-param.ispc
new file mode 100644
index 00000000..8bf97065
--- /dev/null
+++ b/tests/func-anon-param.ispc
@@ -0,0 +1,15 @@
+
+
+export uniform int width() { return programCount; }
+
+float foo(float &) { return 1; }
+float bar(uniform float []) { return 2; }
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    float x = 0;
+    RET[programIndex] = foo(x) + bar(aFOO);
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 3;
+}
diff --git a/tests/func-overload-max.ispc b/tests/func-overload-max.ispc
new file mode 100644
index 00000000..37360030
--- /dev/null
+++ b/tests/func-overload-max.ispc
@@ -0,0 +1,12 @@
+
+export uniform int width() { return programCount; }
+
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    float a = 1. / aFOO[programIndex]; 
+    RET[programIndex] = max(0, a); 
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 1. / (1+programIndex);
+}
diff --git a/tests/func-overload-refs.ispc b/tests/func-overload-refs.ispc
new file mode 100644
index 00000000..89184812
--- /dev/null
+++ b/tests/func-overload-refs.ispc
@@ -0,0 +1,14 @@
+
+export uniform int width() { return programCount; }
+
+float foo(float &a) { return 1; }
+float foo(const float &a) { return 2; }
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    float x = 0;
+    RET[programIndex] = foo(x); 
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 1;
+}
diff --git a/tests/func-ptr-initializer.ispc b/tests/func-ptr-initializer.ispc
new file mode 100644
index 00000000..96537391
--- /dev/null
+++ b/tests/func-ptr-initializer.ispc
@@ -0,0 +1,28 @@
+
+export uniform int width() { return programCount; }
+
+
+typedef float (*func)();
+
+float foo();
+float bar();
+
+struct X { func f, g; };
+
+static uniform X x = { foo, &bar };
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    RET[programIndex] = x.f() + x.g();
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = programIndex;
+}
+
+float foo() {
+    return 2 * programIndex;
+}
+
+float bar() {
+    return -programIndex;
+}
diff --git a/tests/funcptr-null-1.ispc b/tests/funcptr-null-1.ispc
index 05798918..784b5ada 100644
--- a/tests/funcptr-null-1.ispc
+++ b/tests/funcptr-null-1.ispc
@@ -15,7 +15,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
     float a = aFOO[programIndex]; 
     float b = aFOO[0]-1;
     uniform FuncType func = foo;
-    RET[programIndex] = (func ? func : bar)(a, b);
+    RET[programIndex] = (func ? func : &bar)(a, b);
 }
 
 export void result(uniform float RET[]) {
diff --git a/tests/funcptr-null-3.ispc b/tests/funcptr-null-3.ispc
index 8e228315..3fd74da0 100644
--- a/tests/funcptr-null-3.ispc
+++ b/tests/funcptr-null-3.ispc
@@ -14,7 +14,7 @@ static float bar(float a, float b) {
 export void f_f(uniform float RET[], uniform float aFOO[]) {
     float a = aFOO[programIndex]; 
     float b = aFOO[0]-1;
-    FuncType func = foo;
+    FuncType func = &foo;
     if (a == 2)
         func = NULL;
     if (func != NULL)
diff --git a/tests/funcptr-null-6.ispc b/tests/funcptr-null-6.ispc
index cf92c4a7..45bcfcdd 100644
--- a/tests/funcptr-null-6.ispc
+++ b/tests/funcptr-null-6.ispc
@@ -16,7 +16,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
     float b = aFOO[0]-1;
     FuncType func = NULL;
     if (a == 2)
-        func = foo;
+        func = &foo;
     if (!func)
         RET[programIndex] = -1;
     else
diff --git a/tests/funcptr-uniform-2.ispc b/tests/funcptr-uniform-2.ispc
index 849c9492..59d54b40 100644
--- a/tests/funcptr-uniform-2.ispc
+++ b/tests/funcptr-uniform-2.ispc
@@ -14,7 +14,7 @@ static float bar(float a, float b) {
 export void f_f(uniform float RET[], uniform float aFOO[]) {
     float a = aFOO[programIndex]; 
     float b = aFOO[0]-1;
-    uniform FuncType func = bar;
+    uniform FuncType func = &bar;
     if (aFOO[0] == 1)
         func = foo;
     RET[programIndex] = func(a, b);
diff --git a/tests/global-decl-define.ispc b/tests/global-decl-define.ispc
new file mode 100644
index 00000000..44fb92a7
--- /dev/null
+++ b/tests/global-decl-define.ispc
@@ -0,0 +1,14 @@
+
+
+export uniform int width() { return programCount; }
+
+extern int foo;
+int foo = 1;
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    RET[programIndex] = foo; 
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 1;
+}
diff --git a/tests/intptr.ispc b/tests/intptr.ispc
new file mode 100644
index 00000000..7eb9eef7
--- /dev/null
+++ b/tests/intptr.ispc
@@ -0,0 +1,19 @@
+
+export uniform int width() { return programCount; }
+
+
+export void f_v(uniform float RET[]) {
+    RET[programIndex] = sizeof(uniform intptr_t);
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 
+#if (ISPC_POINTER_SIZE==32)
+        4
+#elif (ISPC_POINTER_SIZE==64)
+        8
+#else
+#error Unknown pointer size
+#endif
+        ;
+}
diff --git a/tests/ptr-cast-complex.ispc b/tests/ptr-cast-complex.ispc
new file mode 100644
index 00000000..afdbf5e7
--- /dev/null
+++ b/tests/ptr-cast-complex.ispc
@@ -0,0 +1,18 @@
+
+export uniform int width() { return programCount; }
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    uniform int x[2][10];    
+    for (uniform int i = 0; i < 2; ++i) {
+        for (uniform int j = 0; j < 10; ++j) {
+            x[i][j] = 10*i+j;
+        }
+    }
+
+    uniform int (* varying y)[10] = x;
+    RET[programIndex] = y[1][programIndex % 5];
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 10+ (programIndex % 5);
+}
diff --git a/tests/ptr-math-variability.ispc b/tests/ptr-math-variability.ispc
new file mode 100644
index 00000000..4fa89206
--- /dev/null
+++ b/tests/ptr-math-variability.ispc
@@ -0,0 +1,12 @@
+
+export uniform int width() { return programCount; }
+
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    uniform float * uniform ptr = aFOO;
+    RET[programIndex] = *(ptr + programIndex) - 1;
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = programIndex;
+}
diff --git a/tests/ptr-null-func-arg.ispc b/tests/ptr-null-func-arg.ispc
new file mode 100644
index 00000000..fdd0cbab
--- /dev/null
+++ b/tests/ptr-null-func-arg.ispc
@@ -0,0 +1,14 @@
+
+export uniform int width() { return programCount; }
+
+bool bar(float * x) {
+    return (x != 0);
+}
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    RET[programIndex] = bar(NULL);
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 0;
+}
diff --git a/tests/ref-as-temporary.ispc b/tests/ref-as-temporary.ispc
new file mode 100644
index 00000000..1b167da6
--- /dev/null
+++ b/tests/ref-as-temporary.ispc
@@ -0,0 +1,14 @@
+
+export uniform int width() { return programCount; }
+
+
+int func(const int &a) { return a+1; }
+int bar() { return 0; }
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    RET[programIndex] = func(bar());
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 1;
+}
diff --git a/tests/ref-vec-param-index.ispc b/tests/ref-vec-param-index.ispc
new file mode 100644
index 00000000..70256dc1
--- /dev/null
+++ b/tests/ref-vec-param-index.ispc
@@ -0,0 +1,16 @@
+
+export uniform int width() { return programCount; }
+
+float foo(uniform float<4> &vec) {
+    return vec[programIndex & 3];
+}
+
+export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
+    uniform float<4> vec = { b, -1, 2*b, -b };
+    RET[programIndex] = foo(vec); 
+}
+
+export void result(uniform float RET[]) {
+    uniform float a[4] = { 5, -1, 10, -5 };
+    RET[programIndex] = a[programIndex & 3];
+}
diff --git a/tests/short-circuit-13.ispc b/tests/short-circuit-13.ispc
deleted file mode 100644
index fb0a94a2..00000000
--- a/tests/short-circuit-13.ispc
+++ /dev/null
@@ -1,25 +0,0 @@
-
-export uniform int width() { return programCount; }
-
-uniform int * uniform ptr;
-
-float foo(uniform float a[]) {
-    int index = (programIndex & 1) * 10000;
-    if (a[programIndex] < 10000 && a[index] == 1)
-        return 1;
-    else
-        return 1234;
-}
-
-export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
-    float a = aFOO[programIndex]; 
-    float a0 = aFOO[0], a1 = aFOO[1];
-    if ((programIndex & 1) == 0)
-        RET[programIndex] = foo(aFOO);
-    else
-        RET[programIndex] = 2;
-}
-
-export void result(uniform float RET[]) {
-    RET[programIndex] = (programIndex & 1) ? 2 : 1;
-}
diff --git a/tests/struct-forward-decl-2.ispc b/tests/struct-forward-decl-2.ispc
new file mode 100644
index 00000000..2660c541
--- /dev/null
+++ b/tests/struct-forward-decl-2.ispc
@@ -0,0 +1,36 @@
+
+export uniform int width() { return programCount; }
+
+struct Foo;
+
+void bing(Foo * uniform);
+
+struct Foo {
+    int i;
+    varying float f;
+    Foo * uniform next;
+};
+
+void bar(Foo * uniform f) {
+    bing(f);
+}
+
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    uniform Foo fa, fb;
+    fa.next = &fb;
+    fb.f = aFOO[programIndex]; 
+    fb.i = 100;
+    bar(&fa);
+    RET[programIndex] = fb.f; 
+}
+
+
+void bing(Foo * uniform f) {
+    f = f->next;
+    f->f *= 2;
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 2 + 2*programIndex;
+}
diff --git a/tests/struct-forward-decl.ispc b/tests/struct-forward-decl.ispc
new file mode 100644
index 00000000..54f09be6
--- /dev/null
+++ b/tests/struct-forward-decl.ispc
@@ -0,0 +1,33 @@
+
+export uniform int width() { return programCount; }
+
+struct Foo;
+
+void bing(varying Foo * uniform);
+
+struct Foo {
+    float f;
+    int i;
+};
+
+void bar(varying Foo * uniform f) {
+    bing(f);
+}
+
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    Foo f;
+    f.f = aFOO[programIndex]; 
+    f.i = programIndex;
+    bar(&f);
+    RET[programIndex] = f.f; 
+}
+
+
+void bing(varying Foo * uniform f) {
+    f->f *= 2;
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 2 + 2*programIndex;
+}
diff --git a/tests/struct-zero-len-array-member.ispc b/tests/struct-zero-len-array-member.ispc
new file mode 100644
index 00000000..83e91854
--- /dev/null
+++ b/tests/struct-zero-len-array-member.ispc
@@ -0,0 +1,24 @@
+
+struct Foo {
+    float x;
+    float a[0];
+};
+
+export uniform int width() { return programCount; }
+
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    uniform int nFloats = 3+programCount;
+    varying Foo * uniform ptr = (varying Foo * uniform)(uniform new varying int32[nFloats]);
+    memset(ptr, 0, nFloats*sizeof(int32));
+    
+    for (uniform int i = 0; i < nFloats-1; ++i)
+        ptr->a[i] = i;
+    ptr->x = aFOO[programIndex]; 
+
+    RET[programIndex] = ptr->a[1+programIndex]; 
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 1 + programIndex;
+}
diff --git a/tests/typecast-void-funcall-1.ispc b/tests/typecast-void-funcall-1.ispc
new file mode 100644
index 00000000..c9aa0ed7
--- /dev/null
+++ b/tests/typecast-void-funcall-1.ispc
@@ -0,0 +1,17 @@
+
+export uniform int width() { return programCount; }
+
+float add(float a, float b, uniform float * uniform result) {
+    result[programIndex] = a+b;
+    return a+b;
+}
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    float a = aFOO[programIndex]; 
+    float b = 0.; b = a; 
+    (void)add(a, b, RET);
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 2 + 2*programIndex;
+}
diff --git a/tests/typecast-void-funcall.ispc b/tests/typecast-void-funcall.ispc
new file mode 100644
index 00000000..f2431ef9
--- /dev/null
+++ b/tests/typecast-void-funcall.ispc
@@ -0,0 +1,17 @@
+
+export uniform int width() { return programCount; }
+
+uniform float add(float a, float b, uniform float * uniform result) {
+    result[programIndex] = a+b;
+    return 1;
+}
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+    float a = aFOO[programIndex]; 
+    float b = 0.; b = a; 
+    (void)add(a, b, RET);
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 2 + 2*programIndex;
+}
diff --git a/tests_errors/addr-of-1.ispc b/tests_errors/addr-of-1.ispc
new file mode 100644
index 00000000..4d770f01
--- /dev/null
+++ b/tests_errors/addr-of-1.ispc
@@ -0,0 +1,5 @@
+// Illegal to take address of non-lvalue or function
+
+void foo() {
+    int *ptr = &(1+1);
+}
diff --git a/tests_errors/deref-3.ispc b/tests_errors/deref-3.ispc
index 19d4e82d..d7e6e906 100644
--- a/tests_errors/deref-3.ispc
+++ b/tests_errors/deref-3.ispc
@@ -1,4 +1,4 @@
-// Dereference operator "->" can't be applied to non-pointer type "varying struct Foo"
+// Member operator "->" can't be applied to non-pointer type "varying struct Foo"
 
 struct Foo { int x; };
 
diff --git a/tests_errors/func-param-mismatch-2.ispc b/tests_errors/func-param-mismatch-2.ispc
index 09b27064..63c0239a 100644
--- a/tests_errors/func-param-mismatch-2.ispc
+++ b/tests_errors/func-param-mismatch-2.ispc
@@ -1,4 +1,4 @@
-// Unable to find matching overload for call to function
+// Unable to find any matching overload for call to function
 
 void foo(int x);
 
diff --git a/tests_errors/func-param-mismatch-3.ispc b/tests_errors/func-param-mismatch-3.ispc
index 7e5f2b99..cb34c8a7 100644
--- a/tests_errors/func-param-mismatch-3.ispc
+++ b/tests_errors/func-param-mismatch-3.ispc
@@ -1,4 +1,4 @@
-// Unable to find matching overload for call to function
+// Unable to find any matching overload for call to function
 
 void foo(int x);
 
diff --git a/tests_errors/func-param-mismatch.ispc b/tests_errors/func-param-mismatch.ispc
index c2bac94f..44a50903 100644
--- a/tests_errors/func-param-mismatch.ispc
+++ b/tests_errors/func-param-mismatch.ispc
@@ -1,4 +1,4 @@
-// Unable to find matching overload for call to function
+// Unable to find any matching overload for call to function
 
 void foo();
 
diff --git a/tests_errors/global-decl-1.ispc b/tests_errors/global-decl-1.ispc
new file mode 100644
index 00000000..6f111bbf
--- /dev/null
+++ b/tests_errors/global-decl-1.ispc
@@ -0,0 +1,4 @@
+// Definition of variable "foo" conflicts with definition at
+
+extern int foo;
+float foo;
diff --git a/tests_errors/global-decl-2.ispc b/tests_errors/global-decl-2.ispc
new file mode 100644
index 00000000..66647ea7
--- /dev/null
+++ b/tests_errors/global-decl-2.ispc
@@ -0,0 +1,4 @@
+// Definition of variable "foo" conflicts with definition at 
+
+extern int foo;
+extern float foo;
diff --git a/tests_errors/global-redef-1.ispc b/tests_errors/global-redef-1.ispc
new file mode 100644
index 00000000..7ebb3da7
--- /dev/null
+++ b/tests_errors/global-redef-1.ispc
@@ -0,0 +1,4 @@
+// Definition of variable "foo" conflicts with definition at
+
+int foo;
+float foo;
diff --git a/tests_errors/global-redef.ispc b/tests_errors/global-redef.ispc
new file mode 100644
index 00000000..9a2df32f
--- /dev/null
+++ b/tests_errors/global-redef.ispc
@@ -0,0 +1,4 @@
+// Redefinition of variable "foo" is illegal
+
+int foo;
+int foo;
diff --git a/tests_errors/ptr-1.ispc b/tests_errors/ptr-1.ispc
index 66a9bff4..97a88488 100644
--- a/tests_errors/ptr-1.ispc
+++ b/tests_errors/ptr-1.ispc
@@ -1,4 +1,4 @@
-// Can't convert between incompatible pointer types
+// Can't convert from pointer type "void * varying" to incompatible pointer type "uniform int32 * varying" for return statement
 
 int *foo(void *p) {
     return p;
diff --git a/tests_errors/ref-initializer-1.ispc b/tests_errors/ref-initializer-1.ispc
new file mode 100644
index 00000000..c926c793
--- /dev/null
+++ b/tests_errors/ref-initializer-1.ispc
@@ -0,0 +1,6 @@
+// Initializer for reference-type variable "x" must have an lvalue type
+
+float &func(uniform float a[], int i, float f) {
+    float &x = 1.; // a[i];
+}
+
diff --git a/tests_errors/ref-initializer-2.ispc b/tests_errors/ref-initializer-2.ispc
new file mode 100644
index 00000000..4612addf
--- /dev/null
+++ b/tests_errors/ref-initializer-2.ispc
@@ -0,0 +1,6 @@
+// Initializer for reference-type variable "x" must have a uniform lvalue type
+
+float &func(uniform float a[], int i, float f) {
+    float &x = a[i];
+}
+
diff --git a/tests_errors/ref-initializer-3.ispc b/tests_errors/ref-initializer-3.ispc
new file mode 100644
index 00000000..27833b54
--- /dev/null
+++ b/tests_errors/ref-initializer-3.ispc
@@ -0,0 +1,6 @@
+// Initializer for reference-type variable "x" must have a uniform lvalue type
+
+float &func(uniform int a[], int i, float f) {
+    float &x = a[i];
+}
+
diff --git a/tests_errors/return-ref-1.ispc b/tests_errors/return-ref-1.ispc
new file mode 100644
index 00000000..fee20b18
--- /dev/null
+++ b/tests_errors/return-ref-1.ispc
@@ -0,0 +1,5 @@
+// Illegal to return non-lvalue from function returning reference type
+
+float &func(uniform float a[], int i, float f) {
+    return 1.f;
+}
diff --git a/tests_errors/return-ref-2.ispc b/tests_errors/return-ref-2.ispc
new file mode 100644
index 00000000..6ed667c1
--- /dev/null
+++ b/tests_errors/return-ref-2.ispc
@@ -0,0 +1,5 @@
+// Illegal to return varying lvalue type from function returning a reference type
+
+float &func(uniform float a[], int i, float f) {
+    return a[i];
+}
diff --git a/tests_errors/struct-ref-undecl-1.ispc b/tests_errors/struct-ref-undecl-1.ispc
new file mode 100644
index 00000000..0d851117
--- /dev/null
+++ b/tests_errors/struct-ref-undecl-1.ispc
@@ -0,0 +1,5 @@
+// Member operator "." can't be applied to declared but not defined struct type
+
+struct Foo;
+
+int bar(Foo & foo) { return foo.x; }
diff --git a/tests_errors/struct-ref-undecl-2.ispc b/tests_errors/struct-ref-undecl-2.ispc
new file mode 100644
index 00000000..bb233ccc
--- /dev/null
+++ b/tests_errors/struct-ref-undecl-2.ispc
@@ -0,0 +1,5 @@
+// Member operator "->" can't be applied to declared but not defined struct type
+
+struct Foo;
+
+int bar(Foo * uniform foo) { return foo->x; }
diff --git a/tests_errors/struct-unsized-array.ispc b/tests_errors/struct-unsized-array.ispc
index 77553eff..7238a351 100644
--- a/tests_errors/struct-unsized-array.ispc
+++ b/tests_errors/struct-unsized-array.ispc
@@ -1,4 +1,4 @@
-// Unsized arrays aren't allowed in struct definitions
+// Unsized arrays aren't allowed except for the last member in a struct definition.
 
 struct Foo {
     float a[];
diff --git a/type.cpp b/type.cpp
index 0fb8817e..64e832bb 100644
--- a/type.cpp
+++ b/type.cpp
@@ -42,6 +42,7 @@
 #include "module.h"
 
 #include <stdio.h>
+#include <map>
 #include <llvm/Value.h>
 #include <llvm/Module.h>
 #include <llvm/Analysis/DIBuilder.h>
@@ -59,7 +60,7 @@ static bool
 lShouldPrintName(const std::string &name) {
     if (name.size() == 0)
         return false;
-    else if (name[0] != '_')
+    else if (name[0] != '_' && name[0] != '$')
         return true;
     else
         return (name.size() == 1) || (name[1] != '_');
@@ -81,11 +82,7 @@ lCreateDIArray(llvm::DIType eltType, int count) {
     llvm::Value *sub = m->diBuilder->getOrCreateSubrange(lowerBound, upperBound);
     std::vector<llvm::Value *> subs;
     subs.push_back(sub);
-#ifdef LLVM_2_9
-    llvm::DIArray subArray = m->diBuilder->getOrCreateArray(&subs[0], subs.size());
-#else
     llvm::DIArray subArray = m->diBuilder->getOrCreateArray(subs);
-#endif
 
     uint64_t size = eltType.getSizeInBits() * count;
     uint64_t align = eltType.getAlignInBits();
@@ -418,7 +415,7 @@ AtomicType::GetCDeclaration(const std::string &name) const {
 }
 
 
-LLVM_TYPE_CONST llvm::Type *
+llvm::Type *
 AtomicType::LLVMType(llvm::LLVMContext *ctx) const {
     Assert(variability.type != Variability::Unbound);
     bool isUniform = (variability == Variability::Uniform);
@@ -518,12 +515,7 @@ AtomicType::GetDIType(llvm::DIDescriptor scope) const {
     else if (variability == Variability::Varying) {
         llvm::DIType unifType = GetAsUniformType()->GetDIType(scope);
         llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target.vectorWidth-1);
-#ifdef LLVM_2_9
-        llvm::Value *suba[] = { sub };
-        llvm::DIArray subArray = m->diBuilder->getOrCreateArray(suba, 1);
-#else
         llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub);
-#endif // LLVM_2_9
         uint64_t size =  unifType.getSizeInBits()  * g->target.vectorWidth;
         uint64_t align = unifType.getAlignInBits() * g->target.vectorWidth;
         return m->diBuilder->createVectorType(size, align, unifType, subArray);
@@ -734,7 +726,7 @@ EnumType::GetCDeclaration(const std::string &varName) const {
 }
 
 
-LLVM_TYPE_CONST llvm::Type *
+llvm::Type *
 EnumType::LLVMType(llvm::LLVMContext *ctx) const {
     Assert(variability != Variability::Unbound);
 
@@ -767,14 +759,8 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const {
             m->diBuilder->createEnumerator(enumerators[i]->name, enumeratorValue);
         enumeratorDescriptors.push_back(descriptor);
     }
-#ifdef LLVM_2_9
-    llvm::DIArray elementArray = 
-        m->diBuilder->getOrCreateArray(&enumeratorDescriptors[0],
-                                       enumeratorDescriptors.size());
-#else
     llvm::DIArray elementArray = 
         m->diBuilder->getOrCreateArray(enumeratorDescriptors);
-#endif
 
     llvm::DIFile diFile = pos.GetDIFile();
     llvm::DIType diType =
@@ -789,12 +775,7 @@ EnumType::GetDIType(llvm::DIDescriptor scope) const {
         return diType;
     case Variability::Varying: {
         llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, g->target.vectorWidth-1);
-#ifdef LLVM_2_9
-        llvm::Value *suba[] = { sub };
-        llvm::DIArray subArray = m->diBuilder->getOrCreateArray(suba, 1);
-#else
         llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub);
-#endif // !LLVM_2_9
         uint64_t size =  diType.getSizeInBits()  * g->target.vectorWidth;
         uint64_t align = diType.getAlignInBits() * g->target.vectorWidth;
         return m->diBuilder->createVectorType(size, align, diType, subArray);
@@ -966,42 +947,6 @@ PointerType::GetAsFrozenSlice() const {
 }
 
 
-/** Returns a structure corresponding to the pointer representation for
-    slice pointers; the first member of this structure is a uniform or
-    varying pointer, and the second element is either a uniform or varying
-    int32.
- */
-const StructType *
-PointerType::GetSliceStructType() const {
-    Assert(isSlice == true);
-
-    std::vector<const Type *> eltTypes;
-    eltTypes.push_back(GetAsNonSlice());
-    switch (variability.type) {
-    case Variability::Uniform:
-        eltTypes.push_back(AtomicType::UniformInt32);
-        break;
-    case Variability::Varying:
-        eltTypes.push_back(AtomicType::VaryingInt32);
-        break;
-    default:
-        FATAL("Unexpected variability in PointerType::GetSliceStructType()");
-    }
-
-    std::vector<std::string> eltNames;
-    std::vector<SourcePos> eltPos;
-
-    eltNames.push_back("ptr");
-    eltNames.push_back("offset");
-
-    eltPos.push_back(SourcePos());
-    eltPos.push_back(SourcePos());
-
-    return new StructType("__ptr_slice_tmp", eltTypes, eltNames, eltPos, isConst,
-                          Variability::Uniform, SourcePos());
-}
-
-
 const PointerType *
 PointerType::ResolveUnboundVariability(Variability v) const {
     if (baseType == NULL) {
@@ -1103,22 +1048,41 @@ PointerType::GetCDeclaration(const std::string &name) const {
 }
 
 
-LLVM_TYPE_CONST llvm::Type *
+llvm::Type *
 PointerType::LLVMType(llvm::LLVMContext *ctx) const {
     if (baseType == NULL) {
         Assert(m->errorCount > 0);
         return NULL;
     }
 
-    if (isSlice)
-        // Slice pointers are represented as a structure with a pointer and
-        // an integer offset; the corresponding ispc type is returned by
-        // GetSliceStructType().
-        return GetSliceStructType()->LLVMType(ctx);
+    if (isSlice) {
+        llvm::Type *types[2];
+        types[0] = GetAsNonSlice()->LLVMType(ctx);
+        
+        switch (variability.type) {
+        case Variability::Uniform:
+            types[1] = LLVMTypes::Int32Type;
+            break;
+        case Variability::Varying:
+            types[1] = LLVMTypes::Int32VectorType;
+            break;
+        case Variability::SOA:
+            types[1] = llvm::ArrayType::get(LLVMTypes::Int32Type,
+                                            variability.soaWidth);
+            break;
+        default:
+            FATAL("unexpected variability for slice pointer in "
+                  "PointerType::LLVMType");
+        }
+
+        llvm::ArrayRef<llvm::Type *> typesArrayRef =
+            llvm::ArrayRef<llvm::Type *>(types, 2);
+        return llvm::StructType::get(*g->ctx, typesArrayRef);
+    }
 
     switch (variability.type) {
     case Variability::Uniform: {
-        LLVM_TYPE_CONST llvm::Type *ptype = NULL;
+        llvm::Type *ptype = NULL;
         const FunctionType *ftype = dynamic_cast<const FunctionType *>(baseType);
         if (ftype != NULL) 
             // Get the type of the function variant that takes the mask as the
@@ -1157,13 +1121,15 @@ PointerType::GetDIType(llvm::DIDescriptor scope) const {
 
     llvm::DIType diTargetType = baseType->GetDIType(scope);
     int bitsSize = g->target.is32Bit ? 32 : 64;
+    int ptrAlignBits = bitsSize;
     switch (variability.type) {
     case Variability::Uniform:
-        return m->diBuilder->createPointerType(diTargetType, bitsSize);
+        return m->diBuilder->createPointerType(diTargetType, bitsSize, 
+                                               ptrAlignBits);
     case Variability::Varying: {
         // emit them as an array of pointers
         llvm::DIType eltType = m->diBuilder->createPointerType(diTargetType, 
-                                                               bitsSize);
+                                                               bitsSize, ptrAlignBits);
         return lCreateDIArray(eltType, g->target.vectorWidth);
     }
     case Variability::SOA: {
@@ -1196,14 +1162,14 @@ ArrayType::ArrayType(const Type *c, int a)
 }
 
 
-LLVM_TYPE_CONST llvm::ArrayType *
+llvm::ArrayType *
 ArrayType::LLVMType(llvm::LLVMContext *ctx) const {
     if (child == NULL) {
         Assert(m->errorCount > 0);
         return NULL;
     }
 
-    LLVM_TYPE_CONST llvm::Type *ct = child->LLVMType(ctx);
+    llvm::Type *ct = child->LLVMType(ctx);
     if (ct == NULL) {
         Assert(m->errorCount > 0);
         return NULL;
@@ -1648,14 +1614,14 @@ VectorType::GetElementType() const {
 }
 
 
-LLVM_TYPE_CONST llvm::Type *
+llvm::Type *
 VectorType::LLVMType(llvm::LLVMContext *ctx) const {
     if (base == NULL) {
         Assert(m->errorCount > 0);
         return NULL;
     }
 
-    LLVM_TYPE_CONST llvm::Type *bt = base->LLVMType(ctx);
+    llvm::Type *bt = base->LLVMType(ctx);
     if (!bt)
         return NULL;
 
@@ -1684,12 +1650,7 @@ llvm::DIType
 VectorType::GetDIType(llvm::DIDescriptor scope) const {
     llvm::DIType eltType = base->GetDIType(scope);
     llvm::Value *sub = m->diBuilder->getOrCreateSubrange(0, numElements-1);
-#ifdef LLVM_2_9
-    llvm::Value *subs[1] = { sub };
-    llvm::DIArray subArray = m->diBuilder->getOrCreateArray(subs, 1);
-#else
     llvm::DIArray subArray = m->diBuilder->getOrCreateArray(sub);
-#endif
 
     uint64_t sizeBits = eltType.getSizeInBits() * numElements;
 
@@ -1744,12 +1705,103 @@ VectorType::getVectorMemoryCount() const {
 ///////////////////////////////////////////////////////////////////////////
 // StructType
 
+// We maintain a map from struct names to LLVM struct types so that we can
+// uniquely get the llvm::StructType * for a given ispc struct type.  Note
+// that we need to mangle the name a bit so that we can e.g. differentiate
+// between the uniform and varying variants of a given struct type.  This
+// is handled by lMangleStructName() below.
+static std::map<std::string, llvm::StructType *> lStructTypeMap;
+
+/** Using a struct's name, its variability, and the vector width for the
+    current compilation target, this function generates a string that
+    encodes that full structure type, for use in the lStructTypeMap.  Note
+    that the vector width is needed in order to differentiate between
+    'varying' structs with different compilation targets, which have
+    different memory layouts...
+ */
+static std::string
+lMangleStructName(const std::string &name, Variability variability) {
+    char buf[32];
+    std::string n;
+
+    // Encode vector width
+    sprintf(buf, "v%d", g->target.vectorWidth);
+    n += buf;
+
+    // Variability
+    switch (variability.type) {
+    case Variability::Uniform:
+        n += "_uniform_";
+        break;
+    case Variability::Varying:
+        n += "_varying_";
+        break;
+    case Variability::SOA:
+        sprintf(buf, "_soa%d_", variability.soaWidth);
+        n += buf;
+        break;
+    default:
+        FATAL("Unexpected varaibility in lMangleStructName()");
+    }
+    
+    // And stuff the name at the end....
+    n += name;
+    return n;
+}
+
+        
 StructType::StructType(const std::string &n, const std::vector<const Type *> &elts, 
                        const std::vector<std::string> &en,
                        const std::vector<SourcePos> &ep,
                        bool ic, Variability v, SourcePos p) 
     : name(n), elementTypes(elts), elementNames(en), elementPositions(ep),
       variability(v), isConst(ic), pos(p) {
+    if (variability != Variability::Unbound) {
+        // For structs with non-unbound variability, we'll create the
+        // correspoing LLVM struct type now, if one hasn't been made
+        // already.
+
+        // Create a unique anonymous struct name if we have an anonymous
+        // struct (name == ""), or if we are creating a derived type from
+        // an anonymous struct (e.g. the varying variant--name == '$').
+        if (name == "" || name[0] == '$') {
+            char buf[16];
+            static int count = 0;
+            sprintf(buf, "$anon%d", count);
+            name = buf;
+            ++count;
+        }
+
+        // If a non-opaque LLVM struct for this type has already been
+        // created, we're done.  For an opaque struct type, we'll override
+        // the old definition now that we have a full definition.
+        std::string mname = lMangleStructName(name, variability);
+        if (lStructTypeMap.find(mname) != lStructTypeMap.end() &&
+            lStructTypeMap[mname]->isOpaque() == false)
+            return;
+
+        // Actually make the LLVM struct
+        std::vector<llvm::Type *> elementTypes;
+        for (int i = 0; i < GetElementCount(); ++i) {
+            const Type *type = GetElementType(i);
+            if (type == NULL) {
+                Assert(m->errorCount > 0);
+                return;
+            }
+            elementTypes.push_back(type->LLVMType(g->ctx));
+        }
+
+        if (lStructTypeMap.find(mname) == lStructTypeMap.end()) {
+            // New struct definition
+            llvm::StructType *st =
+                llvm::StructType::create(*g->ctx, elementTypes, mname);
+            lStructTypeMap[mname] = st;
+        }
+        else {
+            // Definition for what was before just a declaration
+            lStructTypeMap[mname]->setBody(elementTypes);
+        }
+    }
 }
 
 
@@ -1877,31 +1929,34 @@ StructType::GetAsNonConstType() const {
 std::string
 StructType::GetString() const {
     std::string ret;
-    if (isConst)   ret += "const ";
+    if (isConst)
+        ret += "const ";
     ret += variability.GetString();
     ret += " ";
 
-    // Don't print the entire struct declaration, just print the struct's name.
-    // @todo Do we need a separate method that prints the declaration?
-#if 0
-    ret += std::string("struct { ") + name;
-    for (unsigned int i = 0; i < elementTypes.size(); ++i) {
-        ret += elementTypes[i]->GetString();
-        ret += " ";
-        ret += elementNames[i];
-        ret += "; ";
+    if (name[0] == '$') {
+        // Print the whole anonymous struct declaration
+        ret += std::string("struct { ") + name;
+        for (unsigned int i = 0; i < elementTypes.size(); ++i) {
+            ret += elementTypes[i]->GetString();
+            ret += " ";
+            ret += elementNames[i];
+            ret += "; ";
+        }
+        ret += "}";
     }
-    ret += "}";
-#else
-    ret += "struct ";
-    ret += name;
-#endif
+    else {
+        ret += "struct ";
+        ret += name;
+    }
+
     return ret;
 }
 
 
-std::string
-StructType::Mangle() const {
+/** Mangle a struct name for use in function name mangling. */
+static std::string
+lMangleStruct(Variability variability, bool isConst, const std::string &name) {
     Assert(variability != Variability::Unbound);
 
     std::string ret;
@@ -1910,12 +1965,15 @@ StructType::Mangle() const {
         ret += "_c_";
     ret += variability.MangleString();
 
-    ret += name + std::string("]<");
-    for (unsigned int i = 0; i < elementTypes.size(); ++i)
-        ret += GetElementType(i)->Mangle();
-    ret += ">";
+    ret += name + std::string("]");
     return ret;
 }
+
+
+std::string
+StructType::Mangle() const {
+    return lMangleStruct(variability, isConst, name);
+}
     
 
 std::string
@@ -1923,31 +1981,31 @@ StructType::GetCDeclaration(const std::string &n) const {
     std::string ret;
     if (isConst) ret += "const ";
     ret += std::string("struct ") + name;
-    if (lShouldPrintName(n))
+    if (lShouldPrintName(n)) {
         ret += std::string(" ") + n;
 
-    if (variability.soaWidth > 0) {
-        char buf[32];
-        // This has to match the naming scheme used in lEmitStructDecls()
-        // in module.cpp
-        sprintf(buf, "_SOA%d", variability.soaWidth);
-        ret += buf;
+        if (variability.soaWidth > 0) {
+            char buf[32];
+            // This has to match the naming scheme used in lEmitStructDecls()
+            // in module.cpp
+            sprintf(buf, "_SOA%d", variability.soaWidth);
+            ret += buf;
+        }
     }
 
     return ret;
 }
 
 
-LLVM_TYPE_CONST llvm::Type *
+llvm::Type *
 StructType::LLVMType(llvm::LLVMContext *ctx) const {
-    std::vector<LLVM_TYPE_CONST llvm::Type *> llvmTypes;
-    for (int i = 0; i < GetElementCount(); ++i) {
-        const Type *type = GetElementType(i);
-        if (type == NULL)
-            return NULL;
-        llvmTypes.push_back(type->LLVMType(ctx));
+    Assert(variability != Variability::Unbound);
+    std::string mname = lMangleStructName(name, variability);
+    if (lStructTypeMap.find(mname) == lStructTypeMap.end()) {
+        Assert(m->errorCount > 0);
+        return NULL;
     }
-    return llvm::StructType::get(*ctx, llvmTypes);
+    return lStructTypeMap[mname];
 }
 
 
@@ -1963,6 +2021,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
         llvm::DIType eltType = GetElementType(i)->GetDIType(scope);
         uint64_t eltAlign = eltType.getAlignInBits();
         uint64_t eltSize = eltType.getSizeInBits();
+        Assert(eltAlign != 0);
 
         // The alignment for the entire structure is the maximum of the
         // required alignments of its elements
@@ -1976,17 +2035,10 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
 
         llvm::DIFile diFile = elementPositions[i].GetDIFile();
         int line = elementPositions[i].first_line;
-#ifdef LLVM_2_9
-        llvm::DIType fieldType = 
-            m->diBuilder->createMemberType(elementNames[i], diFile, line,
-                                           eltSize, eltAlign, currentSize, 0,
-                                           eltType);
-#else
         llvm::DIType fieldType = 
             m->diBuilder->createMemberType(scope, elementNames[i], diFile, 
                                            line, eltSize, eltAlign, 
                                            currentSize, 0, eltType);
-#endif // LLVM_2_9
         elementLLVMTypes.push_back(fieldType);
 
         currentSize += eltSize;
@@ -1997,12 +2049,7 @@ StructType::GetDIType(llvm::DIDescriptor scope) const {
     if (currentSize > 0 && (currentSize % align))
         currentSize += align - (currentSize % align);
 
-#ifdef LLVM_2_9
-    llvm::DIArray elements = m->diBuilder->getOrCreateArray(&elementLLVMTypes[0], 
-                                                            elementLLVMTypes.size());
-#else
     llvm::DIArray elements = m->diBuilder->getOrCreateArray(elementLLVMTypes);
-#endif
     llvm::DIFile diFile = pos.GetDIFile();
     return m->diBuilder->createStructType(scope, name, diFile, pos.first_line, currentSize, 
                                           align, 0, elements);
@@ -2014,6 +2061,10 @@ StructType::GetElementType(int i) const {
     Assert(variability != Variability::Unbound);
     Assert(i < (int)elementTypes.size());
     const Type *ret = elementTypes[i];
+    if (ret == NULL) {
+        Assert(m->errorCount > 0);
+        return NULL;
+    }
 
     // If the element has unbound variability, resolve its variability to
     // the struct type's variability
@@ -2070,6 +2121,170 @@ StructType::checkIfCanBeSOA(const StructType *st) {
 }
 
 
+///////////////////////////////////////////////////////////////////////////
+// UndefinedStructType
+
+UndefinedStructType::UndefinedStructType(const std::string &n, 
+                                         const Variability var, bool ic,
+                                         SourcePos p) 
+    : name(n), variability(var), isConst(ic), pos(p) {
+    Assert(name != "");
+    if (variability != Variability::Unbound) {
+        // Create a new opaque LLVM struct type for this struct name
+        std::string mname = lMangleStructName(name, variability);
+        if (lStructTypeMap.find(mname) == lStructTypeMap.end())
+            lStructTypeMap[mname] = llvm::StructType::create(*g->ctx, mname);
+    }
+}
+
+
+Variability
+UndefinedStructType::GetVariability() const {
+    return variability;
+}
+
+
+bool
+UndefinedStructType::IsBoolType() const {
+    return false;
+}
+
+
+bool
+UndefinedStructType::IsFloatType() const {
+    return false;
+}
+
+
+bool
+UndefinedStructType::IsIntType() const {
+    return false;
+}
+
+
+bool
+UndefinedStructType::IsUnsignedType() const {
+    return false;
+}
+
+
+bool
+UndefinedStructType::IsConstType() const {
+    return isConst;
+}
+
+
+const Type *
+UndefinedStructType::GetBaseType() const {
+    return this;
+}
+
+
+const UndefinedStructType *
+UndefinedStructType::GetAsVaryingType() const {
+    if (variability == Variability::Varying)
+        return this;
+    return new UndefinedStructType(name, Variability::Varying, isConst, pos);
+}
+
+
+const UndefinedStructType *
+UndefinedStructType::GetAsUniformType() const {
+    if (variability == Variability::Uniform)
+        return this;
+    return new UndefinedStructType(name, Variability::Uniform, isConst, pos);
+}
+
+
+const UndefinedStructType *
+UndefinedStructType::GetAsUnboundVariabilityType() const {
+    if (variability == Variability::Unbound)
+        return this;
+    return new UndefinedStructType(name, Variability::Unbound, isConst, pos);
+}
+
+
+const UndefinedStructType *
+UndefinedStructType::GetAsSOAType(int width) const {
+    FATAL("UndefinedStructType::GetAsSOAType() shouldn't be called.");
+    return NULL;
+}
+
+
+const UndefinedStructType *
+UndefinedStructType::ResolveUnboundVariability(Variability v) const {
+    if (variability != Variability::Unbound)
+        return this;
+    return new UndefinedStructType(name, v, isConst, pos);
+}
+
+
+const UndefinedStructType *
+UndefinedStructType::GetAsConstType() const {
+    if (isConst)
+        return this;
+    return new UndefinedStructType(name, variability, true, pos);
+}
+
+
+const UndefinedStructType *
+UndefinedStructType::GetAsNonConstType() const {
+    if (isConst == false)
+        return this;
+    return new UndefinedStructType(name, variability, false, pos);
+}
+
+
+std::string
+UndefinedStructType::GetString() const {
+    std::string ret;
+    if (isConst)   ret += "const ";
+    ret += variability.GetString();
+    ret += " struct ";
+    ret += name;
+    return ret;
+}
+
+
+std::string
+UndefinedStructType::Mangle() const {
+    return lMangleStruct(variability, isConst, name);
+}
+
+
+std::string
+UndefinedStructType::GetCDeclaration(const std::string &n) const {
+    std::string ret;
+    if (isConst) ret += "const ";
+    ret += std::string("struct ") + name;
+    if (lShouldPrintName(n))
+        ret += std::string(" ") + n;
+    return ret;
+}
+
+
+llvm::Type *
+UndefinedStructType::LLVMType(llvm::LLVMContext *ctx) const {
+    Assert(variability != Variability::Unbound);
+    std::string mname = lMangleStructName(name, variability);
+    if (lStructTypeMap.find(mname) == lStructTypeMap.end()) {
+        Assert(m->errorCount > 0);
+        return NULL;
+    }
+    return lStructTypeMap[mname];
+}
+
+
+llvm::DIType
+UndefinedStructType::GetDIType(llvm::DIDescriptor scope) const {
+    llvm::DIFile diFile = pos.GetDIFile();
+    llvm::DIArray elements;
+    return m->diBuilder->createStructType(scope, name, diFile, pos.first_line, 
+                                          0 /* size */, 0 /* align */, 
+                                          0 /* flags */, elements); 
+}
+
+
 ///////////////////////////////////////////////////////////////////////////
 // ReferenceType
 
@@ -2290,14 +2505,14 @@ ReferenceType::GetCDeclaration(const std::string &name) const {
 }
 
 
-LLVM_TYPE_CONST llvm::Type *
+llvm::Type *
 ReferenceType::LLVMType(llvm::LLVMContext *ctx) const {
     if (targetType == NULL) {
         Assert(m->errorCount > 0);
         return NULL;
     }
 
-    LLVM_TYPE_CONST llvm::Type *t = targetType->LLVMType(ctx);
+    llvm::Type *t = targetType->LLVMType(ctx);
     if (t == NULL) {
         Assert(m->errorCount > 0);
         return NULL;
@@ -2326,7 +2541,7 @@ FunctionType::FunctionType(const Type *r, const std::vector<const Type *> &a,
                            SourcePos p)
     : isTask(false), isExported(false), isExternC(false), returnType(r), 
       paramTypes(a), paramNames(std::vector<std::string>(a.size(), "")),
-      paramDefaults(std::vector<ConstExpr *>(a.size(), NULL)),
+      paramDefaults(std::vector<Expr *>(a.size(), NULL)),
       paramPositions(std::vector<SourcePos>(a.size(), p)) {
     Assert(returnType != NULL);
     isSafe = false;
@@ -2336,7 +2551,7 @@ FunctionType::FunctionType(const Type *r, const std::vector<const Type *> &a,
 
 FunctionType::FunctionType(const Type *r, const std::vector<const Type *> &a, 
                            const std::vector<std::string> &an, 
-                           const std::vector<ConstExpr *> &ad,
+                           const std::vector<Expr *> &ad,
                            const std::vector<SourcePos> &ap,
                            bool it, bool is, bool ec) 
     : isTask(it), isExported(is), isExternC(ec), returnType(r), paramTypes(a), 
@@ -2450,32 +2665,19 @@ FunctionType::ResolveUnboundVariability(Variability v) const {
 
 const Type *
 FunctionType::GetAsConstType() const {
-    FATAL("FunctionType::GetAsConstType shouldn't be called");
-    return NULL;
+    return this;
 }
 
 
 const Type *
 FunctionType::GetAsNonConstType() const {
-    FATAL("FunctionType::GetAsNonConstType shouldn't be called");
-    return NULL;
+    return this;
 }
 
 
 std::string
 FunctionType::GetString() const {
-    std::string ret;
-    if (isTask) ret += "task ";
-    if (isSafe) ret += "/*safe*/ ";
-    if (costOverride > 0) {
-        char buf[32];
-        sprintf(buf, "/*cost=%d*/ ", costOverride);
-        ret += buf;
-    }
-    if (returnType != NULL)
-        ret += returnType->GetString();
-    else
-        ret += "/* ERROR */";
+    std::string ret = GetReturnTypeString();
     ret += "(";
     for (unsigned int i = 0; i < paramTypes.size(); ++i) {
         if (paramTypes[i] == NULL)
@@ -2535,7 +2737,7 @@ FunctionType::GetCDeclaration(const std::string &fname) const {
 }
 
 
-LLVM_TYPE_CONST llvm::Type *
+llvm::Type *
 FunctionType::LLVMType(llvm::LLVMContext *ctx) const {
     FATAL("FunctionType::LLVMType() shouldn't be called");
     return NULL;
@@ -2544,19 +2746,55 @@ FunctionType::LLVMType(llvm::LLVMContext *ctx) const {
 
 llvm::DIType
 FunctionType::GetDIType(llvm::DIDescriptor scope) const {
-    // @todo need to implement FunctionType::GetDIType()
-    FATAL("need to implement FunctionType::GetDIType()");
-    return llvm::DIType();
+    std::vector<llvm::Value *> retArgTypes;
+
+    retArgTypes.push_back(returnType->GetDIType(scope));
+    for (int i = 0; i < GetNumParameters(); ++i) {
+        const Type *t = GetParameterType(i);
+        if (t == NULL)
+            return llvm::DIType();
+        retArgTypes.push_back(t->GetDIType(scope));
+    }
+
+    llvm::DIArray retArgTypesArray = 
+        m->diBuilder->getOrCreateArray(llvm::ArrayRef<llvm::Value *>(retArgTypes));
+    llvm::DIType diType = 
+        // FIXME: DIFile 
+        m->diBuilder->createSubroutineType(llvm::DIFile(), retArgTypesArray);
+    return diType;
 }
 
 
-LLVM_TYPE_CONST llvm::FunctionType *
+const std::string
+FunctionType::GetReturnTypeString() const {
+    if (returnType == NULL)
+        return "/* ERROR */";
+
+    std::string ret;
+    if (isTask)
+        ret += "task ";
+    if (isExported)
+        ret += "export ";
+    if (isExternC)
+        ret += "extern \"C\" ";
+    if (isSafe) 
+        ret += "/*safe*/ ";
+    if (costOverride > 0) {
+        char buf[32];
+        sprintf(buf, "/*cost=%d*/ ", costOverride);
+        ret += buf;
+    }
+    return ret + returnType->GetString();
+}
+
+
+llvm::FunctionType *
 FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool includeMask) const {
     if (isTask == true) 
         Assert(includeMask == true);
 
     // Get the LLVM Type *s for the function arguments
-    std::vector<LLVM_TYPE_CONST llvm::Type *> llvmArgTypes;
+    std::vector<llvm::Type *> llvmArgTypes;
     for (unsigned int i = 0; i < paramTypes.size(); ++i) {
         if (paramTypes[i] == NULL) {
             Assert(m->errorCount > 0);
@@ -2564,7 +2802,7 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool includeMask) const {
         }
         Assert(Type::Equal(paramTypes[i], AtomicType::Void) == false);
 
-        LLVM_TYPE_CONST llvm::Type *t = paramTypes[i]->LLVMType(ctx);
+        llvm::Type *t = paramTypes[i]->LLVMType(ctx);
         if (t == NULL) {
             Assert(m->errorCount > 0);
             return NULL;
@@ -2576,7 +2814,7 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool includeMask) const {
     if (includeMask)
         llvmArgTypes.push_back(LLVMTypes::MaskType);
 
-    std::vector<LLVM_TYPE_CONST llvm::Type *> callTypes;
+    std::vector<llvm::Type *> callTypes;
     if (isTask) {
         // Tasks take three arguments: a pointer to a struct that holds the
         // actual task arguments, the thread index, and the total number of
@@ -2610,7 +2848,7 @@ FunctionType::GetParameterType(int i) const {
 }
 
 
-ConstExpr *
+Expr *
 FunctionType::GetParameterDefault(int i) const { 
     Assert(i < (int)paramDefaults.size());
     return paramDefaults[i]; 
@@ -2681,6 +2919,17 @@ Type::MoreGeneralType(const Type *t0, const Type *t1, SourcePos pos, const char
                       bool forceVarying, int vecSize) {
     Assert(reason != NULL);
 
+    // First, if one or both types are function types, convert them to
+    // pointer to function types and then try again.
+    if (dynamic_cast<const FunctionType *>(t0) ||
+        dynamic_cast<const FunctionType *>(t1)) {
+        if (dynamic_cast<const FunctionType *>(t0))
+            t0 = PointerType::GetUniform(t0);
+        if (dynamic_cast<const FunctionType *>(t1))
+            t1 = PointerType::GetUniform(t1);
+        return MoreGeneralType(t0, t1, pos, reason, forceVarying, vecSize);
+    }
+
     // First, if we need to go varying, promote both of the types to be
     // varying.
     if (t0->IsVaryingType() || t1->IsVaryingType() || forceVarying) {
@@ -2888,20 +3137,19 @@ lCheckTypeEquality(const Type *a, const Type *b, bool ignoreConst) {
 
     const StructType *sta = dynamic_cast<const StructType *>(a);
     const StructType *stb = dynamic_cast<const StructType *>(b);
-    if (sta != NULL && stb != NULL) {
-        if (sta->GetElementCount() != stb->GetElementCount())
+    const UndefinedStructType *usta =
+        dynamic_cast<const UndefinedStructType *>(a);
+    const UndefinedStructType *ustb =
+        dynamic_cast<const UndefinedStructType *>(b);
+    if ((sta != NULL || usta != NULL) && (stb != NULL || ustb != NULL)) {
+        // Report both defuned and undefined structs as equal if their
+        // names are the same.
+        if (a->GetVariability() != b->GetVariability())
             return false;
-        if (sta->GetStructName() != stb->GetStructName())
-            return false;
-        if (sta->GetVariability() != stb->GetVariability())
-            return false;
-        for (int i = 0; i < sta->GetElementCount(); ++i)
-            // FIXME: is this redundant now?
-            if (!lCheckTypeEquality(sta->GetElementType(i), stb->GetElementType(i),
-                                    ignoreConst))
-                return false;
 
-        return true;
+        std::string namea = sta ? sta->GetStructName() : usta->GetStructName();
+        std::string nameb = stb ? stb->GetStructName() : ustb->GetStructName();
+        return (namea == nameb);
     }
 
     const PointerType *pta = dynamic_cast<const PointerType *>(a);
diff --git a/type.h b/type.h
index 94c28f0b..e0560ce5 100644
--- a/type.h
+++ b/type.h
@@ -187,7 +187,7 @@ public:
     virtual std::string GetCDeclaration(const std::string &name) const = 0;
 
     /** Returns the LLVM type corresponding to this ispc type */
-    virtual LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const = 0;
+    virtual llvm::Type *LLVMType(llvm::LLVMContext *ctx) const = 0;
 
     /** Returns the DIType (LLVM's debugging information structure),
         corresponding to this type. */
@@ -269,7 +269,7 @@ public:
     std::string Mangle() const;
     std::string GetCDeclaration(const std::string &name) const;
 
-    LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
+    llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
     llvm::DIType GetDIType(llvm::DIDescriptor scope) const;
 
     /** This enumerator records the basic types that AtomicTypes can be 
@@ -343,7 +343,7 @@ public:
     std::string Mangle() const;
     std::string GetCDeclaration(const std::string &name) const;
 
-    LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
+    llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
     llvm::DIType GetDIType(llvm::DIDescriptor scope) const;
 
     /** Provides the enumerators defined in the enum definition. */
@@ -409,7 +409,6 @@ public:
     const PointerType *GetAsSlice() const;
     const PointerType *GetAsNonSlice() const;
     const PointerType *GetAsFrozenSlice() const;
-    const StructType *GetSliceStructType() const;
 
     const Type *GetBaseType() const;
     const PointerType *GetAsVaryingType() const;
@@ -425,7 +424,7 @@ public:
     std::string Mangle() const;
     std::string GetCDeclaration(const std::string &name) const;
 
-    LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
+    llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
     llvm::DIType GetDIType(llvm::DIDescriptor scope) const;
 
     static PointerType *Void;
@@ -523,7 +522,7 @@ public:
     std::string GetCDeclaration(const std::string &name) const;
 
     llvm::DIType GetDIType(llvm::DIDescriptor scope) const;
-    LLVM_TYPE_CONST llvm::ArrayType *LLVMType(llvm::LLVMContext *ctx) const;
+    llvm::ArrayType *LLVMType(llvm::LLVMContext *ctx) const;
 
     /** This method returns the total number of elements in the array,
         including all dimensions if this is a multidimensional array. */
@@ -589,7 +588,7 @@ public:
     std::string Mangle() const;
     std::string GetCDeclaration(const std::string &name) const;
 
-    LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
+    llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
     llvm::DIType GetDIType(llvm::DIDescriptor scope) const;
 
     int GetElementCount() const;
@@ -639,7 +638,7 @@ public:
     std::string Mangle() const;
     std::string GetCDeclaration(const std::string &name) const;
 
-    LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
+    llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
     llvm::DIType GetDIType(llvm::DIDescriptor scope) const;
 
     /** Returns the type of the structure element with the given name (if any).
@@ -668,7 +667,7 @@ public:
 private:
     static bool checkIfCanBeSOA(const StructType *st);
 
-    const std::string name;
+    /*const*/ std::string name;
     /** The types of the struct elements.  Note that we store these with
         uniform/varying exactly as they were declared in the source file.
         (In other words, even if this struct has a varying qualifier and
@@ -690,6 +689,52 @@ private:
 };
 
 
+/** Type implementation representing a struct name that has been declared
+    but where the struct members haven't been defined (i.e. "struct Foo;").
+    This class doesn't do much besides serve as a placeholder that other
+    code can use to detect the presence of such as truct.
+ */
+class UndefinedStructType : public Type {
+public:
+    UndefinedStructType(const std::string &name, const Variability variability,
+                        bool isConst, SourcePos pos);
+
+    Variability GetVariability() const;
+
+    bool IsBoolType() const;
+    bool IsFloatType() const;
+    bool IsIntType() const;
+    bool IsUnsignedType() const;
+    bool IsConstType() const;
+
+    const Type *GetBaseType() const;
+    const UndefinedStructType *GetAsVaryingType() const;
+    const UndefinedStructType *GetAsUniformType() const;
+    const UndefinedStructType *GetAsUnboundVariabilityType() const;
+    const UndefinedStructType *GetAsSOAType(int width) const;
+    const UndefinedStructType *ResolveUnboundVariability(Variability v) const;
+
+    const UndefinedStructType *GetAsConstType() const;
+    const UndefinedStructType *GetAsNonConstType() const;
+
+    std::string GetString() const;
+    std::string Mangle() const;
+    std::string GetCDeclaration(const std::string &name) const;
+
+    llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
+    llvm::DIType GetDIType(llvm::DIDescriptor scope) const;
+
+    /** Returns the name of the structure type.  (e.g. struct Foo -> "Foo".) */
+    const std::string &GetStructName() const { return name; }
+
+private:
+    const std::string name;
+    const Variability variability;
+    const bool isConst;
+    const SourcePos pos;
+};
+
+
 /** @brief Type representing a reference to another (non-reference) type.
  */
 class ReferenceType : public Type {
@@ -719,7 +764,7 @@ public:
     std::string Mangle() const;
     std::string GetCDeclaration(const std::string &name) const;
 
-    LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
+    llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
     llvm::DIType GetDIType(llvm::DIDescriptor scope) const;
 
 private:
@@ -745,7 +790,7 @@ public:
     FunctionType(const Type *returnType, 
                  const std::vector<const Type *> &argTypes,
                  const std::vector<std::string> &argNames,
-                 const std::vector<ConstExpr *> &argDefaults,
+                 const std::vector<Expr *> &argDefaults,
                  const std::vector<SourcePos> &argPos,
                  bool isTask, bool isExported, bool isExternC);
 
@@ -771,21 +816,23 @@ public:
     std::string Mangle() const;
     std::string GetCDeclaration(const std::string &fname) const;
 
-    LLVM_TYPE_CONST llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
+    llvm::Type *LLVMType(llvm::LLVMContext *ctx) const;
     llvm::DIType GetDIType(llvm::DIDescriptor scope) const;
 
     const Type *GetReturnType() const { return returnType; }
 
+    const std::string GetReturnTypeString() const;
+
     /** This method returns the LLVM FunctionType that corresponds to this
         function type.  The \c includeMask parameter indicates whether the
         llvm::FunctionType should have a mask as the last argument in its
         function signature. */
-    LLVM_TYPE_CONST llvm::FunctionType *LLVMFunctionType(llvm::LLVMContext *ctx, 
+    llvm::FunctionType *LLVMFunctionType(llvm::LLVMContext *ctx, 
                                                          bool includeMask = false) const;
 
     int GetNumParameters() const { return (int)paramTypes.size(); }
     const Type *GetParameterType(int i) const;
-    ConstExpr * GetParameterDefault(int i) const;
+    Expr * GetParameterDefault(int i) const;
     const SourcePos &GetParameterSourcePos(int i) const;
     const std::string &GetParameterName(int i) const;
 
@@ -818,7 +865,7 @@ private:
     const std::vector<std::string> paramNames;
     /** Default values of the function's arguments.  For arguments without
         default values provided, NULL is stored. */
-    mutable std::vector<ConstExpr *> paramDefaults;
+    mutable std::vector<Expr *> paramDefaults;
     /** The names provided (if any) with the function arguments in the
         function's signature.  These should only be used for error messages
         and the like and so not affect testing function types for equality,
@@ -826,4 +873,8 @@ private:
     const std::vector<SourcePos> paramPositions;
 };
 
+inline bool IsReferenceType(const Type *t) {
+    return dynamic_cast<const ReferenceType *>(t) != NULL;
+}
+
 #endif // ISPC_TYPE_H