Release 1.4.3

License update (just dates)
Merge pull request #530 from dbabokin/llvm_fix
2013-06-25 18:38:21 +04:00 · 2013-06-25 17:02:42 +04:00 · 2013-06-25 05:22:09 -07:00 · 2013-06-25 16:21:14 +04:00 · 2013-06-25 03:08:02 -07:00 · 2013-06-25 02:14:24 -07:00
20 changed files with 304 additions and 116 deletions
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (c) 2010-2011, Intel Corporation
+Copyright (c) 2010-2013, Intel Corporation
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
@@ -77,7 +77,7 @@ covered by the following license:
 University of Illinois/NCSA
 Open Source License
-Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
 All rights reserved.
 Developed by:
--- a/16
+++ b/16
@@ -51,7 +51,17 @@ else
 endif
 ARCH_TYPE = $(shell arch)
-LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs engine ipo bitreader bitwriter instrumentation linker)
+LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
 LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
 LLVM_VERSION_DEF=-D$(LLVM_VERSION)
 LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker
 # Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step.
 # We check if it's available before adding it (to not break 3.2 and earlier).
 ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1)
    LLVM_COMPONENTS+=option
 endif
 LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS))
 CLANG=clang
 CLANG_LIBS = -lclangFrontend -lclangDriver \
@@ -70,10 +80,6 @@ ifeq ($(ARCH_OS2),Msys)
 	ISPC_LIBS += -lshlwapi -limagehlp -lpsapi
 endif
 LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
 LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
 LLVM_VERSION_DEF=-D$(LLVM_VERSION)
 # Define build time stamp and revision.
 # For revision we use GIT or SVN info.
 BUILD_DATE=$(shell date +%Y%m%d)
--- a/bitcode2cpp.py
+++ b/bitcode2cpp.py
@@ -45,7 +45,7 @@ for i in range(0, len(data), 1):
            sys.stdout.write("\n")
 sys.stdout.write("0x00 };\n\n")
-sys.stdout.write("int builtins_bitcode_" + name + "_length = " + str(i+1) + ";\n")
+sys.stdout.write("int builtins_bitcode_" + name + "_length = " + str(len(data)) + ";\n")
 as_out.wait()
--- a/cbackend.cpp
+++ b/cbackend.cpp
@@ -2188,7 +2188,11 @@ bool CWriter::doInitialization(llvm::Module &M) {
 #endif
  TAsm = new CBEMCAsmInfo();
  MRI  = new llvm::MCRegisterInfo();
 #if defined(LLVM_3_4)
  TCtx = new llvm::MCContext(TAsm, MRI, NULL);
 #else
  TCtx = new llvm::MCContext(*TAsm, *MRI, NULL);
 #endif
  //Mang = new llvm::Mangler(*TCtx, *TD);
  // Keep track of which functions are static ctors/dtors so they can have
--- a/ctx.cpp
+++ b/ctx.cpp
@@ -1414,7 +1414,7 @@ FunctionEmitContext::ProgramIndexVector(bool is32bits) {
 llvm::Value *
 FunctionEmitContext::GetStringPtr(const std::string &str) {
-    llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str);
+    llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str, false);
    llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
    llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(),
                                                    true /*isConst*/,
@@ -1464,11 +1464,13 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
 static llvm::Value *
 lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
-    llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s);
+    llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s, false);
    std::string var_name = "_";
    var_name = var_name + s;
    llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(),
                                                 true /* const */,
                                                 llvm::GlobalValue::InternalLinkage,
-                                                 sConstant, s);
+                                                 sConstant, var_name.c_str());
    llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(0) };
    llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
    return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock);
--- a/docs/ReleaseNotes.txt
+++ b/docs/ReleaseNotes.txt
@@ -1,3 +1,17 @@
 === v1.4.3 === (25 June 2013)
 A minor version update with several stability improvements:
 * Two bugs were fixed (including a bug in LLVM) to improve stability on 32 bit
  platforms.
 * A bug affecting several examples was fixed.
 * --instrument switch is fixed.
 All tests and examples now properly compile and execute on native targets on
 Unix platforms (Linux and MacOS).
 === v1.4.2 === (11 June 2013)
 A minor version update with a few important changes:
--- a/docs/news.rst
+++ b/docs/news.rst
@@ -2,6 +2,14 @@
 ispc News
 =========
 ispc 1.4.3 is Released
 ----------------------
 A minor update of ``ispc`` has been released with several stability improvements.
 All tests and examples now properly compile and execute on native targets on
 Unix platforms (Linux and MacOS).
 The released binaries are built with patched version of LLVM 3.3.
 ispc 1.4.2 is Released
 ----------------------
--- a/doxygen.cfg
+++ b/doxygen.cfg
@@ -31,7 +31,7 @@ PROJECT_NAME           = "Intel SPMD Program Compiler"
 # This could be handy for archiving the generated documentation or
 # if some version control system is used.
-PROJECT_NUMBER         = 1.4.2
+PROJECT_NUMBER         = 1.4.3
 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
 # base path where the generated documentation will be put.
--- a/examples/aobench_instrumented/instrument.cpp
+++ b/examples/aobench_instrumented/instrument.cpp
@@ -60,7 +60,7 @@ int countbits(int i) {
 // Callback function that ispc compiler emits calls to when --instrument
 // command-line flag is given while compiling.
 void
-ISPCInstrument(const char *fn, const char *note, int line, int mask) {
+ISPCInstrument(const char *fn, const char *note, int line, uint64_t mask) {
    char sline[16];
    sprintf(sline, "%04d", line);
    std::string s = std::string(fn) + std::string("(") + std::string(sline) +
--- a/examples/aobench_instrumented/instrument.h
+++ b/examples/aobench_instrumented/instrument.h
@@ -37,7 +37,7 @@
 #include <stdint.h>
 extern "C" {
-    void ISPCInstrument(const char *fn, const char *note, int line, int mask);
+    void ISPCInstrument(const char *fn, const char *note, int line, uint64_t mask);
 }
 void ISPCPrintInstrument();
--- a/expr.cpp
+++ b/expr.cpp
@@ -6802,7 +6802,23 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
        if (!conv)
            return NULL;
-        llvm::Value *cast = ctx->BroadcastValue(conv, toType->LLVMType(g->ctx));
+        llvm::Value *cast = NULL;
        llvm::Type* toTypeLLVM = toType->LLVMType(g->ctx);
        if (llvm::isa<llvm::VectorType>(toTypeLLVM)) {
            // Example uniform float => uniform float<3>
            cast = ctx->BroadcastValue(conv, toTypeLLVM);
        }
        else if (llvm::isa<llvm::ArrayType>(toTypeLLVM)) {
            // Example varying float => varying float<3>
            cast = llvm::UndefValue::get(toType->LLVMType(g->ctx));
            for (int i = 0; i < toVector->GetElementCount(); ++i) {
                // Here's InsertInst produces InsertValueInst.
                cast = ctx->InsertInst(cast, conv, i);
            }
        }
        else {
            FATAL("TypeCastExpr::GetValue: problem with cast");
        }
        return cast;
    }
--- a/func.cpp
+++ b/func.cpp
@@ -85,19 +85,19 @@ Function::Function(Symbol *s, Stmt *c) {
        code = TypeCheck(code);
        if (code != NULL && g->debugPrint) {
-            fprintf(stderr, "After typechecking function \"%s\":\n",
+            printf("After typechecking function \"%s\":\n",
                    sym->name.c_str());
            code->Print(0);
-            fprintf(stderr, "---------------------\n");
+            printf("---------------------\n");
        }
        if (code != NULL) {
            code = Optimize(code);
            if (g->debugPrint) {
-                fprintf(stderr, "After optimizing function \"%s\":\n",
+                printf("After optimizing function \"%s\":\n",
                        sym->name.c_str());
                code->Print(0);
-                fprintf(stderr, "---------------------\n");
+                printf("---------------------\n");
            }
        }
    }
--- a/ispc.h
+++ b/ispc.h
@@ -38,7 +38,7 @@
 #ifndef ISPC_H
 #define ISPC_H
-#define ISPC_VERSION "1.4.2"
+#define ISPC_VERSION "1.4.3"
 #if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4)
 #error "Only LLVM 3.1, 3.2, 3.3 and the 3.4 development branch are supported"
--- a/ispc.vcxproj
+++ b/ispc.vcxproj
@@ -464,6 +464,7 @@
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
      <AdditionalDependencies>clangFrontend.lib;clangDriver.lib;clangSerialization.lib;clangParse.lib;clangSema.lib;clangAnalysis.lib;clangEdit.lib;clangAST.lib;clangLex.lib;clangBasic.lib;LLVMAnalysis.lib;LLVMAsmParser.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Desc.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;LLVMipa.lib;LLVMipo.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
      <AdditionalDependencies Condition="'$(LLVM_VERSION)'!='LLVM_3_1'AND'$(LLVM_VERSION)'!='LLVM_3_2'AND'$(LLVM_VERSION)'!='LLVM_3_3'">LLVMOption.lib;LLVMSupport.lib;%(AdditionalDependencies)</AdditionalDependencies>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
@@ -484,6 +485,7 @@
      <OptimizeReferences>true</OptimizeReferences>
      <AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
      <AdditionalDependencies>clangFrontend.lib;clangDriver.lib;clangSerialization.lib;clangParse.lib;clangSema.lib;clangAnalysis.lib;clangEdit.lib;clangAST.lib;clangLex.lib;clangBasic.lib;LLVMAnalysis.lib;LLVMAsmParser.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Desc.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;LLVMipa.lib;LLVMipo.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
      <AdditionalDependencies Condition="'$(LLVM_VERSION)'!='LLVM_3_1'AND'$(LLVM_VERSION)'!='LLVM_3_2'AND'$(LLVM_VERSION)'!='LLVM_3_3'">LLVMOption.lib;LLVMSupport.lib;%(AdditionalDependencies)</AdditionalDependencies>
    </Link>
  </ItemDefinitionGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
--- a/llvm_patches/r184575-x86-shift.patch
+++ b/llvm_patches/r184575-x86-shift.patch
@@ -0,0 +1,52 @@
 This patch needs to be applied to LLVM 3.2/3.3 (but was verified with 3.3 only) to
 fix a problem with shift instructions on x86 (see PR16360 in LLVM bugzilla).
 This is general LLVM problem, which triggers on one of x86 tests in out test suit.
 LLVM 3.4 contains this fix (r184575).
 Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
 ===================================================================
 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp	(revision 183970)
 +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp	(working copy)
@@ -3901,8 +3901,7 @@
                        DAG.getConstant(~0ULL >> ShAmt, VT));
   }
 -
 -  // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
 +  // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
     // Shifting in all undef bits?
     EVT SmallVT = N0.getOperand(0).getValueType();
@@ -3915,7 +3914,10 @@
                                        N0.getOperand(0),
                           DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
       AddToWorkList(SmallShift.getNode());
 -      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
 +      APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
 +      return DAG.getNode(ISD::AND, SDLoc(N), VT,
 +                         DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
 +                         DAG.getConstant(Mask, VT));
     }
   }
 Index: test/CodeGen/X86/pr16360.ll
 ===================================================================
 --- test/CodeGen/X86/pr16360.ll	(revision 0)
 +++ test/CodeGen/X86/pr16360.ll	(revision 0)
@@ -0,0 +1,16 @@
 +; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
 +
 +define i64 @foo(i32 %sum) {
 +entry:
 +  %conv = sext i32 %sum to i64
 +  %shr = lshr i64 %conv, 2
 +  %or = or i64 4611686018360279040, %shr
 +  ret i64 %or
 +}
 +
 +; CHECK: foo
 +; CHECK: shrl $2
 +; CHECK: orl $-67108864
 +; CHECK-NOT: movl $-1
 +; CHECK: movl $1073741823
 +; CHECK: ret
--- a/llvmutil.cpp
+++ b/llvmutil.cpp
@@ -600,9 +600,10 @@ lGetIntValue(llvm::Value *offset) {
 }
-void
+llvm::Value *
 LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
-                       llvm::Value **elements) {
+                       bool compare, bool undef) {
    llvm::Value ** elements = new llvm::Value*[vectorWidth];
    for (int i = 0; i < vectorWidth; ++i) {
        elements[i] = NULL;
    }
@@ -610,43 +611,93 @@ LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
    // Catch a pattern of InsertElement chain.
    if (llvm::InsertElementInst *ie =
            llvm::dyn_cast<llvm::InsertElementInst>(inst)) {
        //Gather elements of vector
        while (ie != NULL) {
            int64_t iOffset = lGetIntValue(ie->getOperand(2));
            Assert(iOffset >= 0 && iOffset < vectorWidth);
            Assert(elements[iOffset] == NULL);
            // Get the scalar value from this insert
-            elements[iOffset] = ie->getOperand(1);
+            if (elements[iOffset] == NULL) {
                elements[iOffset] = ie->getOperand(1);
            }
            // Do we have another insert?
            llvm::Value *insertBase = ie->getOperand(0);
            ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
-            if (ie == NULL) {
+            if (ie != NULL) {
-                if (llvm::isa<llvm::UndefValue>(insertBase)) {
+                continue;
-                    return;
+            }
            if (llvm::isa<llvm::UndefValue>(insertBase)) {
                break;
            }
            if (llvm::isa<llvm::ConstantVector>(insertBase) ||
                llvm::isa<llvm::ConstantAggregateZero>(insertBase)) {
                llvm::Constant *cv = llvm::dyn_cast<llvm::Constant>(insertBase);
                Assert(vectorWidth == (int)(cv->getNumOperands()));
                for (int i=0; i<vectorWidth; i++) {
                    if (elements[i] == NULL) {
                        elements[i] = cv->getOperand(i);
                    }
                }
                break;
            }
            else {
                // Here chain ends in llvm::LoadInst or some other.
                // They are not equal to each other so we should return NULL if compare
                // and first element if we have it.
                Assert(compare == true ||  elements[0] != NULL);
                if (compare) {
                    return NULL;
                }
                else {
                    return elements[0];
                }
            }
            // TODO: Also, should we handle some other values like
            // ConstantDataVectors.
        }
        if (compare == false) {
            //We simply want first element
            return elements[0];
        }
-                // Get the value out of a constant vector if that's what we
+        int null_number = 0;
-                // have
+        int NonNull = 0;
-                llvm::ConstantVector *cv =
+        for(int i = 0; i < vectorWidth; i++) {
-                    llvm::dyn_cast<llvm::ConstantVector>(insertBase);
+            if (elements[i] == NULL) {
-
+                null_number++;
-                // FIXME: this assert is a little questionable; we probably
+            }
-                // shouldn't fail in this case but should just return an
+            else {
-                // incomplete result.  But there aren't currently any known
+                NonNull = i;
                // cases where we have anything other than an undef value or a
                // constant vector at the base, so if that ever does happen,
                // it'd be nice to know what happend so that perhaps we can
                // handle it.
                // FIXME: Also, should we handle ConstantDataVectors with
                // LLVM3.1?  What about ConstantAggregateZero values??
                Assert(cv != NULL);
                Assert(iOffset < (int)cv->getNumOperands());
                elements[iOffset] = cv->getOperand((int32_t)iOffset);
            }
        }
        if (null_number == vectorWidth) {
            //All of elements are NULLs
            return NULL;
        }
        if ((undef == false) && (null_number != 0)) {
            //We don't want NULLs in chain, but we have them
            return NULL;
        }
        // Compare elements of vector
        for (int i = 0; i < vectorWidth; i++) {
            if (elements[i] == NULL) {
                continue;
            }
            std::vector<llvm::PHINode *> seenPhi0;
            std::vector<llvm::PHINode *> seenPhi1;
            if (lValuesAreEqual(elements[NonNull], elements[i],
                seenPhi0, seenPhi1) == false) {
                return NULL;
            }
        }
        return elements[NonNull];
    }
    // Catch a pattern of broadcast implemented as InsertElement + Shuffle:
    //   %broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
    //   %broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
@@ -663,14 +714,12 @@ LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
                    llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2));
                if (ci->isZero()) {
-                    for (int i = 0; i < vectorWidth; ++i) {
+                    return ie->getOperand(1);
                        elements[i] = ie->getOperand(1);
                    }
                    return;
                }
            }
        }
    }
    return NULL;
 }
@@ -726,12 +775,10 @@ lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength,
    if (llvm::isa<llvm::InsertElementInst>(val) ||
        llvm::isa<llvm::ShuffleVectorInst>(val)) {
-        llvm::Value *elts[ISPC_MAX_NVEC];
+        llvm::Value *element = LLVMFlattenInsertChain(val, g->target->getVectorWidth());
        LLVMFlattenInsertChain(val, g->target->getVectorWidth(), elts);
        // We just need to check the scalar first value, since we know that
        // all elements are equal
-        return lIsExactMultiple(elts[0], baseValue, vectorLength,
+        return lIsExactMultiple(element, baseValue, vectorLength, seenPhis);
                                     seenPhis);
    }
    llvm::PHINode *phi = llvm::dyn_cast<llvm::PHINode>(val);
@@ -995,32 +1042,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
    llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(v);
    if (ie != NULL) {
-        llvm::Value *elements[ISPC_MAX_NVEC];
+        return (LLVMFlattenInsertChain(ie, vectorLength) != NULL);
        LLVMFlattenInsertChain(ie, vectorLength, elements);
        // We will ignore any values of elements[] that are NULL; as they
        // correspond to undefined values--we just want to see if all of
        // the defined values have the same value.
        int lastNonNull = 0;
        while (lastNonNull < vectorLength && elements[lastNonNull] == NULL)
            ++lastNonNull;
        if (lastNonNull == vectorLength)
            // all of them are undef!
            return true;
        for (int i = lastNonNull; i < vectorLength; ++i) {
            if (elements[i] == NULL)
                continue;
            std::vector<llvm::PHINode *> seenPhi0;
            std::vector<llvm::PHINode *> seenPhi1;
            if (lValuesAreEqual(elements[lastNonNull], elements[i], seenPhi0,
                                seenPhi1) == false)
                return false;
            lastNonNull = i;
        }
        return true;
    }
    llvm::PHINode *phi = llvm::dyn_cast<llvm::PHINode>(v);
@@ -1472,9 +1494,7 @@ lExtractFirstVectorElement(llvm::Value *v,
    // flatten them out and grab the value for the first one.
    if (llvm::isa<llvm::InsertElementInst>(v) ||
        llvm::isa<llvm::ShuffleVectorInst>(v)) {
-        llvm::Value *elements[ISPC_MAX_NVEC];
+        return LLVMFlattenInsertChain(v, vt->getNumElements(), false);
        LLVMFlattenInsertChain(v, vt->getNumElements(), elements);
        return elements[0];
    }
    // Worst case, for everything else, just do a regular extract element
--- a/llvmutil.h
+++ b/llvmutil.h
@@ -269,9 +269,13 @@ extern bool LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts);
       %broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
       %broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
                                                  <4 x i32> zeroinitializer
    Function returns:
    Compare all elements and return one of them if all are equal, otherwise NULL.
    If compare argument is false, don't do compare and return first element instead.
    If undef argument is true, ignore undef elements (but all undef yields NULL anyway).
 */
-extern void LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
+extern llvm::Value * LLVMFlattenInsertChain (llvm::Value *inst, int vectorWidth,
-                                   llvm::Value **elements);
+    bool compare = true, bool undef = true);
 /** This is a utility routine for debugging that dumps out the given LLVM
    value as well as (recursively) all of the other values that it depends
--- a/opt.cpp
+++ b/opt.cpp
@@ -505,7 +505,11 @@ Optimize(llvm::Module *module, int optLevel) {
        optPM.add(llvm::createCFGSimplificationPass());
        optPM.add(llvm::createArgumentPromotionPass());
 #if defined(LLVM_3_1) || defined(LLVM_3_2) || defined(LLVM_3_3)
        // Starting from 3.4 this functionality was moved to
        // InstructionCombiningPass. See r184459 for details.
        optPM.add(llvm::createSimplifyLibCallsPass());
 #endif
        optPM.add(llvm::createInstructionCombiningPass());
        optPM.add(llvm::createJumpThreadingPass());
        optPM.add(llvm::createCFGSimplificationPass());
@@ -1060,23 +1064,18 @@ static llvm::Value *
 lGetBasePointer(llvm::Value *v) {
    if (llvm::isa<llvm::InsertElementInst>(v) ||
        llvm::isa<llvm::ShuffleVectorInst>(v)) {
-        llvm::Value *elements[ISPC_MAX_NVEC];
+        llvm::Value *element = LLVMFlattenInsertChain
-        LLVMFlattenInsertChain(v, g->target->getVectorWidth(), elements);
+            (v, g->target->getVectorWidth(), true, false);
        // Make sure none of the elements is undefined.
        // TODO: it's probably ok to allow undefined elements and return
        // the base pointer if all of the other elements have the same
        // value.
-        for (int i = 0; i < g->target->getVectorWidth(); ++i)
+        if (element != NULL) {
-            if (elements[i] == NULL)
+            //all elements are the same and not NULLs
-                return NULL;
+            return lCheckForActualPointer(element);
-
+        }
-        // Do all of the elements have the same value?
+        else {
-        for (int i = 0; i < g->target->getVectorWidth()-1; ++i)
+            return NULL;
-            if (elements[i] != elements[i+1])
+        }
                return NULL;
        return lCheckForActualPointer(elements[0]);
    }
    // This case comes up with global/static arrays
--- a/run_tests.py
+++ b/run_tests.py
@@ -17,6 +17,7 @@ import shlex
 import platform
 import tempfile
 import os.path
 import time
 # disable fancy error/warning printing with ANSI colors, so grepping for error
 # messages doesn't get confused
@@ -52,6 +53,8 @@ parser.add_option('-v', '--verbose', dest='verbose', help='Enable verbose output
 parser.add_option('--wrap-exe', dest='wrapexe',
                  help='Executable to wrap test runs with (e.g. "valgrind")',
                  default="")
 parser.add_option('--time', dest='time', help='Enable time output',
                  default=False, action="store_true")
 (options, args) = parser.parse_args()
@@ -217,19 +220,60 @@ def run_cmds(compile_cmds, run_cmd, filename, expect_failure):
        return (0, 0)
-def run_test(testname):
+def add_prefix(path):
    global is_windows
    if is_windows:
-        # On Windows we run tests in tmp dir, so the root is one level up.
+    # On Windows we run tests in tmp dir, so the root is one level up.
        input_prefix = "..\\"
    else:
        input_prefix = ""
    path = input_prefix + path
    path = os.path.normpath(path)
    return path
 def check_test(filename):
    prev_arch = False
    prev_os = False
    done_arch = True
    done_os = True
    done = True
    global is_windows
    if is_windows:
        oss = "windows"
    else:
        oss = "linux"
    b = buffer(file(add_prefix(filename)).read());
    for run in re.finditer('// *rule: run on .*', b):
        arch = re.match('.* arch=.*', run.group())
        if arch != None:
            if re.search(' arch='+options.arch+'$', arch.group()) != None:
                prev_arch = True
            if re.search(' arch='+options.arch+' ', arch.group()) != None:
                prev_arch = True
            done_arch = prev_arch
        OS = re.match('.* OS=.*', run.group())
        if OS != None:
            if re.search(' OS='+oss, OS.group()) != None:
                prev_os = True
            done_os = prev_os
    done = done_arch and done_os
    for skip in re.finditer('// *rule: skip on .*', b):
        if re.search(' arch=' + options.arch + '$', skip.group())!=None:
            done = False
        if re.search(' arch=' + options.arch + ' ', skip.group())!=None:
            done = False
        if re.search(' OS=' + oss, skip.group())!=None:
            done = False
    return done
 def run_test(testname):
    # testname is a path to the test from the root of ispc dir
    # filename is a path to the test from the current dir
    # ispc_exe_rel is a relative path to ispc
-    filename = os.path.normpath(input_prefix + testname)
+    filename = add_prefix(testname)
-    ispc_exe_rel = os.path.normpath(input_prefix + ispc_exe)
+    ispc_exe_rel = add_prefix(ispc_exe)
    # is this a test to make sure an error is issued?
    want_error = (filename.find("tests_errors") != -1)
@@ -290,8 +334,8 @@ def run_test(testname):
                    obj_name = "%s.obj" % os.path.basename(filename)
                exe_name = "%s.exe" % os.path.basename(filename)
-                cc_cmd = "%s /I. /I../winstuff /Zi /nologo /DTEST_SIG=%d %stest_static.cpp %s /Fe%s" % \
+                cc_cmd = "%s /I. /I../winstuff /Zi /nologo /DTEST_SIG=%d %s %s /Fe%s" % \
-                         (options.compiler_exe, match, input_prefix, obj_name, exe_name)
+                         (options.compiler_exe, match, add_prefix("test_static.cpp"), obj_name, exe_name)
                if should_fail:
                    cc_cmd += " /DEXPECT_FAILURE"
            else:
@@ -327,7 +371,7 @@ def run_test(testname):
            if options.no_opt:
                ispc_cmd += " -O0" 
            if is_generic_target:
-                ispc_cmd += " --emit-c++ --c++-include-file=%s" % os.path.normpath(input_prefix + options.include_file)
+                ispc_cmd += " --emit-c++ --c++-include-file=%s" % add_prefix(options.include_file)
        # compile the ispc code, make the executable, and run it...
        (compile_error, run_error) = run_cmds([ispc_cmd, cc_cmd], 
@@ -351,7 +395,7 @@ def run_test(testname):
 # pull tests to run from the given queue and run them.  Multiple copies of
 # this function will be running in parallel across all of the CPU cores of
 # the system.
-def run_tasks_from_queue(queue, queue_ret, total_tests_arg, max_test_length_arg, counter, mutex):
+def run_tasks_from_queue(queue, queue_ret, queue_skip, total_tests_arg, max_test_length_arg, counter, mutex):
    if is_windows:
        tmpdir = "tmp%d" % os.getpid()
        os.mkdir(tmpdir)
@@ -380,14 +424,18 @@ def run_tasks_from_queue(queue, queue_ret, total_tests_arg, max_test_length_arg,
            sys.exit(0)
-        (compile_error, run_error) = run_test(filename)
+        if check_test(filename):
-        if compile_error != 0:
+            (compile_error, run_error) = run_test(filename)
-            compile_error_files += [ filename ]
+            if compile_error != 0:
-        if run_error != 0:
+                compile_error_files += [ filename ]
-            run_error_files += [ filename ]
+            if run_error != 0:
                run_error_files += [ filename ]
            with mutex:
                update_progress(filename, total_tests_arg, counter, max_test_length_arg)
        else:
            queue_skip.put(filename)
        with mutex:
            update_progress(filename, total_tests_arg, counter, max_test_length_arg)
 task_threads = []
@@ -413,6 +461,7 @@ if __name__ == '__main__':
    for x in range(nthreads):
        q.put('STOP')
    qret = multiprocessing.Queue()
    qskip = multiprocessing.Queue()
    # need to catch sigint so that we can terminate all of the tasks if
    # we're interrupted
@@ -421,9 +470,10 @@ if __name__ == '__main__':
    finished_tests_counter = multiprocessing.Value(c_int)
    finished_tests_counter_lock = multiprocessing.Lock()
    start_time = time.time()
    # launch jobs to run tests
    for x in range(nthreads):
-        t = multiprocessing.Process(target=run_tasks_from_queue, args=(q, qret, total_tests, max_test_length, finished_tests_counter, finished_tests_counter_lock))
+        t = multiprocessing.Process(target=run_tasks_from_queue, args=(q, qret, qskip, total_tests, max_test_length, finished_tests_counter, finished_tests_counter_lock))
        task_threads.append(t)
        t.start()
@@ -433,11 +483,21 @@ if __name__ == '__main__':
        t.join()
    sys.stdout.write("\n")
    elapsed_time = time.time() - start_time
    if options.time:
        sys.stdout.write("Elapsed time: %d s\n" % elapsed_time)
    while not qret.empty():
        (c, r) = qret.get()
        compile_error_files += c
        run_error_files += r
    skip = 0
    if qskip.qsize() > 0:
        sys.stdout.write("%d / %d tests SKIPPED:\n" % (qskip.qsize(), total_tests))
        while not qskip.empty():
            sys.stdout.write("\t%s\n" % qskip.get())
    if len(compile_error_files) > 0:
        compile_error_files.sort()
        sys.stdout.write("%d / %d tests FAILED compilation:\n" % (len(compile_error_files), total_tests))
--- a/tests_errors/ptrcast-lose-info.ispc
+++ b/tests_errors/ptrcast-lose-info.ispc
@@ -1,4 +1,5 @@
 // Pointer type cast of type "uniform int32 * uniform" to integer type "uniform int32" may lose information.
 //  rule: run on arch=x86-64
 int32 foo(int * uniform x)  {
    return (int32) x;
Author	SHA1	Message	Date
Dmitry Babokin	594485c38c	Release 1.4.3	2013-06-25 18:38:21 +04:00
Dmitry Babokin	d52e2d5a8d	License update (just dates)	2013-06-25 17:02:42 +04:00
Dmitry Babokin	fc66066d4d	Merge pull request #530 from dbabokin/llvm_fix Adding LLVM patch to fix #519 with LLVM 3.3	2013-06-25 05:22:09 -07:00
Dmitry Babokin	6169338815	Adding LLVM patch to fix #519 with LLVM 3.3	2013-06-25 16:21:14 +04:00
Dmitry Babokin	6bc8cb1ff1	Merge pull request #529 from ifilippov/instrument_fix correction of --instrument option support	2013-06-25 03:08:02 -07:00
Dmitry Babokin	0fc49b1c37	Merge pull request #528 from ifilippov/test3 Reapplying lost commits	2013-06-25 02:14:24 -07:00
Ilia Filippov	9fb981e9a0	correction of --instrument option support	2013-06-25 12:33:23 +04:00
Ilia Filippov	cba1b3cedd	additional libraries for LLVM_3_4 build	2013-06-25 12:22:53 +04:00
Ilia Filippov	12c4512932	adding two additional libraries for LLVM_3_4 build	2013-06-25 12:22:53 +04:00
Dmitry Babokin	0dd1dbb568	Merge pull request #526 from dbabokin/master Tracking LLVM trunk: removing llvm::createSimplifyLibCallsPass() call	2013-06-23 23:10:19 -07:00
Dmitry Babokin	fdcec5a219	Tracking LLVM trunk: removing llvm::createSimplifyLibCallsPass() call	2013-06-24 10:08:06 +04:00
Dmitry Babokin	bebab7ab0d	Merge pull request #525 from dbabokin/debug --debug output: stdout instead of stderr	2013-06-21 03:56:17 -07:00
Dmitry Babokin	fb771b6aa3	--debug output: stdout instead of stderr	2013-06-20 22:47:29 +04:00
jbrodman	8156559475	Merge pull request #522 from dbabokin/broadcast Fix for #520	2013-06-18 11:47:24 -07:00
jbrodman	9f5e51cd01	Merge pull request #523 from dbabokin/tot Tracking ToT changes	2013-06-18 11:47:16 -07:00
Dmitry Babokin	27daab2f1b	Fix for #520	2013-06-18 22:15:49 +04:00
Dmitry Babokin	c4d404b15f	Tracking ToT changes: changes in MCContext interface	2013-06-18 22:13:14 +04:00
Dmitry Babokin	95fcdc36ee	Tracking ToT changes, which now require to link option library. This is Unix only. Windows will be fixed separately	2013-06-18 22:12:33 +04:00
Dmitry Babokin	2fdaba53c1	Merge pull request #517 from ifilippov/bug_34 Fix for tests/soa-22 on x86/sse4 - cleanup in function LLVMFlattenInsertChain().	2013-06-14 08:40:01 -07:00
Ilia Filippov	5c89080469	changes in function LLVMFlattenInsertChain	2013-06-14 16:38:54 +04:00
Ilia Filippov	d92f9df17c	changes in function LLVMFlattenInsertChain	2013-06-14 15:21:45 +04:00
Dmitry Babokin	f551390420	Merge pull request #516 from ifilippov/master Changes to support skipping tests.	2013-06-13 08:48:29 -07:00
Ilia Filippov	8642b4d89f	changing run_tests to support skipping tests and time	2013-06-13 19:25:34 +04:00
Ilia Filippov	6fb70c307d	changing run_tests to support skipping tests and time	2013-06-13 19:00:02 +04:00
Ilia Filippov	d08346fbcf	changes to support skipping tests	2013-06-13 16:47:10 +04:00
jbrodman	141d240a91	Merge pull request #513 from dbabokin/release_142 Release 1.4.2, 11 June 2013	2013-06-11 07:47:37 -07:00