Merge branch 'master' of ssh://fmygit6001.fm.intel.com:29418/ssg_dpd_tpi_ispc-ispc_git

2012-11-13 12:47:13 -08:00
parent 810784da1f d517b37f3f
commit 32d44a5b9e
4 changed files with 34 additions and 13 deletions
--- a/examples/intrinsics/knc.h
+++ b/examples/intrinsics/knc.h
@@ -478,7 +478,7 @@ static FORCEINLINE bool __any(__vec16_i1 mask) {
 }

 static FORCEINLINE bool __all(__vec16_i1 mask) {
-    return (mask=0xFFFF);
+    return (mask==0xFFFF);
 }

 static FORCEINLINE bool __none(__vec16_i1 mask) {
--- a/examples/intrinsics/sse4.h
+++ b/examples/intrinsics/sse4.h
@@ -274,7 +274,7 @@ static FORCEINLINE bool __any(__vec4_i1 mask) {
 }

 static FORCEINLINE bool __all(__vec4_i1 mask) {
-    return (_mm_movemask_ps(mask.v)=0xF);
+    return (_mm_movemask_ps(mask.v)==0xF);
 }

 static FORCEINLINE bool __none(__vec4_i1 mask) {
--- a/expr.cpp
+++ b/expr.cpp
@@ -4318,15 +4318,36 @@ IndexExpr::TypeCheck() {
    bool isUniform = (index->GetType()->IsUniformType() && 
                      !g->opt.disableUniformMemoryOptimizations);

-    // Unless we have an explicit 64-bit index and are compiling to a
-    // 64-bit target with 64-bit addressing, convert the index to an int32
-    // type.
-    if (Type::EqualIgnoringConst(indexType->GetAsUniformType(),
-                                 AtomicType::UniformInt64) == false ||
-        g->target.is32Bit ||
-        g->opt.force32BitAddressing) {
-        const Type *indexType = isUniform ? AtomicType::UniformInt32 : 
-            AtomicType::VaryingInt32;
+    if (!isUniform) {
+        // Unless we have an explicit 64-bit index and are compiling to a
+        // 64-bit target with 64-bit addressing, convert the index to an int32
+        // type.
+        //    The range of varying index is limited to [0,2^31) as a result.
+        if (Type::EqualIgnoringConst(indexType->GetAsUniformType(),
+                                     AtomicType::UniformInt64) == false ||
+            g->target.is32Bit ||
+            g->opt.force32BitAddressing) {
+            const Type *indexType = AtomicType::VaryingInt32;
+            index = TypeConvertExpr(index, indexType, "array index");
+            if (index == NULL)
+                return NULL;
+        }
+    } else { // isUniform
+        // For 32-bit target:
+        //   force the index to 32 bit.
+        // For 64-bit target:
+        //   We don't want to limit the index range. 
+        //   We sxt/zxt the index to 64 bit right here because 
+        //   LLVM doesn't distinguish unsigned from signed (both are i32)
+        //
+        //   However, the index can be still truncated to signed int32 if
+        //   the index type is 64 bit and --addressing=32.
+        bool force_32bit = g->target.is32Bit || 
+            (g->opt.force32BitAddressing && 
+             Type::EqualIgnoringConst(indexType->GetAsUniformType(),
+                                      AtomicType::UniformInt64));
+        const Type *indexType = force_32bit ? 
+            AtomicType::UniformInt32 : AtomicType::UniformInt64;
        index = TypeConvertExpr(index, indexType, "array index");
        if (index == NULL)
            return NULL;
--- a/stmt.cpp
+++ b/stmt.cpp
@@ -2018,7 +2018,6 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
    }

    ctx->SetCurrentBasicBlock(bbBody); {
-        ctx->RestoreContinuedLanes();
        ctx->SetBlockEntryMask(ctx->GetFullMask());

        // Run the code in the body of the loop.  This is easy now.
@@ -2030,6 +2029,7 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
    }

    ctx->SetCurrentBasicBlock(bbCheckForMore); {
+        ctx->RestoreContinuedLanes();
        // At the end of the loop body (either due to running the
        // statements normally, or a continue statement in the middle of
        // the loop that jumps to the end, see if there are any lanes left
@@ -2233,7 +2233,6 @@ ForeachUniqueStmt::EmitCode(FunctionEmitContext *ctx) const {
    }

    ctx->SetCurrentBasicBlock(bbBody); {
-        ctx->RestoreContinuedLanes();
        ctx->SetBlockEntryMask(ctx->GetFullMask());
        // Run the code in the body of the loop.  This is easy now.
        if (stmts)
@@ -2248,6 +2247,7 @@ ForeachUniqueStmt::EmitCode(FunctionEmitContext *ctx) const {
        // statements normally, or a continue statement in the middle of
        // the loop that jumps to the end, see if there are any lanes left
        // to be processed.
+        ctx->RestoreContinuedLanes();
        llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits");
        llvm::Value *nonZero = 
            ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE,