From d665e2e85b1c609c129af7669ead85ea4b98c3a7 Mon Sep 17 00:00:00 2001 From: Gerrit Code Review Date: Wed, 24 Oct 2012 09:53:29 -0700 Subject: [PATCH 1/3] Initial empty repository From e57801a5d16b5641bb0e801e2f34b478336201cb Mon Sep 17 00:00:00 2001 From: "james.brodman" Date: Wed, 31 Oct 2012 15:25:26 -0400 Subject: [PATCH 2/3] Typo Fix --- examples/intrinsics/sse4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/intrinsics/sse4.h b/examples/intrinsics/sse4.h index e3a4e277..b0365c86 100644 --- a/examples/intrinsics/sse4.h +++ b/examples/intrinsics/sse4.h @@ -274,7 +274,7 @@ static FORCEINLINE bool __any(__vec4_i1 mask) { } static FORCEINLINE bool __all(__vec4_i1 mask) { - return (_mm_movemask_ps(mask.v)=0xF); + return (_mm_movemask_ps(mask.v)==0xF); } static FORCEINLINE bool __none(__vec4_i1 mask) { From 810784da1f2e797410847cfe5a404b74297c4d92 Mon Sep 17 00:00:00 2001 From: ptu1 Date: Tue, 13 Nov 2012 12:35:45 -0800 Subject: [PATCH 3/3] Set the ScalarReplAggregate maximum structure size based on target vector width. --- opt.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/opt.cpp b/opt.cpp index bb5ba955..8c6f7b06 100644 --- a/opt.cpp +++ b/opt.cpp @@ -471,8 +471,14 @@ Optimize(llvm::Module *module, int optLevel) { } optPM.add(llvm::createDeadInstEliminationPass()); + // Max struct size threshold for scalar replacement is + // 1) 4 fields (r,g,b,w) + // 2) field size: vectorWidth * sizeof(float) + const int field_limit = 4; + int sr_threshold = g->target.vectorWidth * sizeof(float) * field_limit; + // On to more serious optimizations - optPM.add(llvm::createScalarReplAggregatesPass()); + optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold)); optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createPromoteMemoryToRegisterPass()); @@ -494,7 +500,7 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createJumpThreadingPass()); optPM.add(llvm::createCFGSimplificationPass()); - optPM.add(llvm::createScalarReplAggregatesPass()); + optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold)); optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createTailCallEliminationPass()); @@ -540,7 +546,7 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createFunctionInliningPass()); optPM.add(llvm::createArgumentPromotionPass()); - optPM.add(llvm::createScalarReplAggregatesPass(-1, false)); + optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold, false)); optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createReassociatePass());