From 3c18c7a713595222ce12cbb43c8997240a33ec96 Mon Sep 17 00:00:00 2001 From: "james.brodman" Date: Fri, 26 Oct 2012 16:52:54 -0400 Subject: [PATCH 1/6] Fixed compile error: == instead of = --- examples/intrinsics/sse4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index e3a4e277..b0365c86 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -274,7 +274,7 @@ static FORCEINLINE bool __any(__vec4_i1 mask) { } static FORCEINLINE bool __all(__vec4_i1 mask) { - return (_mm_movemask_ps(mask.v)=0xF); + return (_mm_movemask_ps(mask.v)==0xF); } static FORCEINLINE bool __none(__vec4_i1 mask) { From e323b1d0ade437388bcbfbce03e012c06a37c6a0 Mon Sep 17 00:00:00 2001 From: jbrodman Date: Fri, 26 Oct 2012 16:52:54 -0400 Subject: [PATCH 2/6] Fixed compile error: == instead of = --- examples/intrinsics/sse4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index e3a4e277..b0365c86 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -274,7 +274,7 @@ static FORCEINLINE bool __any(__vec4_i1 mask) { } static FORCEINLINE bool __all(__vec4_i1 mask) { - return (_mm_movemask_ps(mask.v)=0xF); + return (_mm_movemask_ps(mask.v)==0xF); } static FORCEINLINE bool __none(__vec4_i1 mask) { From b80867d473924d130a53201a7bb4d97347517d5c Mon Sep 17 00:00:00 2001 From: Peng Tu Date: Mon, 29 Oct 2012 17:27:11 -0700 Subject: [PATCH 3/6] Move the call to RestoreContinuedLanes from bbBody to the correct place of bbCheckForMore for foreach_unique and foreach_active. --- stmt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stmt.cpp b/stmt.cpp index b719440a..830754d5 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -2018,7 +2018,6 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const { } ctx->SetCurrentBasicBlock(bbBody); { - ctx->RestoreContinuedLanes(); ctx->SetBlockEntryMask(ctx->GetFullMask()); // Run the code in the body of the loop. This is easy now. @@ -2030,6 +2029,7 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const { } ctx->SetCurrentBasicBlock(bbCheckForMore); { + ctx->RestoreContinuedLanes(); // At the end of the loop body (either due to running the // statements normally, or a continue statement in the middle of // the loop that jumps to the end, see if there are any lanes left @@ -2233,7 +2233,6 @@ ForeachUniqueStmt::EmitCode(FunctionEmitContext *ctx) const { } ctx->SetCurrentBasicBlock(bbBody); { - ctx->RestoreContinuedLanes(); ctx->SetBlockEntryMask(ctx->GetFullMask()); // Run the code in the body of the loop. This is easy now. if (stmts) @@ -2248,6 +2247,7 @@ ForeachUniqueStmt::EmitCode(FunctionEmitContext *ctx) const { // statements normally, or a continue statement in the middle of // the loop that jumps to the end, see if there are any lanes left // to be processed. + ctx->RestoreContinuedLanes(); llvm::Value *remainingBits = ctx->LoadInst(maskBitsPtr, "remaining_bits"); llvm::Value *nonZero = ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, From e57801a5d16b5641bb0e801e2f34b478336201cb Mon Sep 17 00:00:00 2001 From: "james.brodman" Date: Wed, 31 Oct 2012 15:25:26 -0400 Subject: [PATCH 4/6] Typo Fix --- examples/intrinsics/sse4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index e3a4e277..b0365c86 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -274,7 +274,7 @@ static FORCEINLINE bool __any(__vec4_i1 mask) { } static FORCEINLINE bool __all(__vec4_i1 mask) { - return (_mm_movemask_ps(mask.v)=0xF); + return (_mm_movemask_ps(mask.v)==0xF); } static FORCEINLINE bool __none(__vec4_i1 mask) { From 04d32ae3e6e9be52521fdd21d97b6d755a2fe718 Mon Sep 17 00:00:00 2001 From: Peng Tu Date: Mon, 5 Nov 2012 15:02:15 -0800 Subject: [PATCH 5/6] Inside LLVM, both signed and unsigned integer are represented with the same type - i32 - effectively a signed int32. On 64 bit target, we must generate explicit sxt/zxt during the LLVM IR creation to promote the array index into 64 bit. Otherwise, an unsigned int index becomes signed int index in the LLVM IR. I limit the fix to uniformed index to avoid widening a varying index vector to 64 bits. This means that the 32 bit values in the varying indices must be positive and smaller than 2^31 at the runtime for a program to behave correctly. --- expr.cpp | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/expr.cpp b/expr.cpp index 5d72a839..ce4704e9 100644 --- a/expr.cpp +++ b/expr.cpp @@ -4318,15 +4318,36 @@ IndexExpr::TypeCheck() { bool isUniform = (index->GetType()->IsUniformType() && !g->opt.disableUniformMemoryOptimizations); - // Unless we have an explicit 64-bit index and are compiling to a - // 64-bit target with 64-bit addressing, convert the index to an int32 - // type. - if (Type::EqualIgnoringConst(indexType->GetAsUniformType(), - AtomicType::UniformInt64) == false || - g->target.is32Bit || - g->opt.force32BitAddressing) { - const Type *indexType = isUniform ? AtomicType::UniformInt32 : - AtomicType::VaryingInt32; + if (!isUniform) { + // Unless we have an explicit 64-bit index and are compiling to a + // 64-bit target with 64-bit addressing, convert the index to an int32 + // type. + // The range of varying index is limited to [0,2^31) as a result. + if (Type::EqualIgnoringConst(indexType->GetAsUniformType(), + AtomicType::UniformInt64) == false || + g->target.is32Bit || + g->opt.force32BitAddressing) { + const Type *indexType = AtomicType::VaryingInt32; + index = TypeConvertExpr(index, indexType, "array index"); + if (index == NULL) + return NULL; + } + } else { // isUniform + // For 32-bit target: + // force the index to 32 bit. + // For 64-bit target: + // We don't want to limit the index range. + // We sxt/zxt the index to 64 bit right here because + // LLVM doesn't distinguish unsigned from signed (both are i32) + // + // However, the index can be still truncated to signed int32 if + // the index type is 64 bit and --addressing=32. + bool force_32bit = g->target.is32Bit || + (g->opt.force32BitAddressing && + Type::EqualIgnoringConst(indexType->GetAsUniformType(), + AtomicType::UniformInt64)); + const Type *indexType = force_32bit ? + AtomicType::UniformInt32 : AtomicType::UniformInt64; index = TypeConvertExpr(index, indexType, "array index"); if (index == NULL) return NULL; From 97ddc1ed1011025619e3e7ba84d86ddb113ba004 Mon Sep 17 00:00:00 2001 From: "james.brodman" Date: Thu, 8 Nov 2012 16:30:12 -0500 Subject: [PATCH 6/6] Fixed =/== error in __all() --- examples/intrinsics/knc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/intrinsics/knc.h b/examples/intrinsics/knc.h index 7cb68bc5..859a4bc7 100644 --- a/examples/intrinsics/knc.h +++ b/examples/intrinsics/knc.h @@ -478,7 +478,7 @@ static FORCEINLINE bool __any(__vec16_i1 mask) { } static FORCEINLINE bool __all(__vec16_i1 mask) { - return (mask=0xFFFF); + return (mask==0xFFFF); } static FORCEINLINE bool __none(__vec16_i1 mask) {