From 4388338dadf84114186a4e3d86a1c13602631740 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 19 Jan 2012 17:54:21 -0800 Subject: [PATCH] Fix performance regression introduced in be0c77d5562 Effectively, the patterns that detected when given a gather or scatter in base+offsets form, the offsets were actually a multiple of 2/4/8, were no longer working. This change not only fixes this, but also expands the set of patterns that are matched by this. For example, given offsets of the form 4*v1 + 16*v2, it identifies a scale of 4 and new offsets of v1 + 4*v2. This fix makes the volume renderer run 1.19x faster, and noise 1.54x faster. --- opt.cpp | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/opt.cpp b/opt.cpp index cd62d342..b9c6708f 100644 --- a/opt.cpp +++ b/opt.cpp @@ -1174,21 +1174,47 @@ lExtractOffsetVector248Scale(llvm::Value **vec) { return scale; } - // If we don't have a multiply, then just return + + // If we don't have a binary operator, then just give up llvm::BinaryOperator *bop = llvm::dyn_cast(*vec); - if (bop == NULL || bop->getOpcode() != llvm::Instruction::Mul) + if (bop == NULL) return LLVMInt32(1); - // Check each operand for being one of the scale factors we care about. llvm::Value *op0 = bop->getOperand(0), *op1 = bop->getOperand(1); - int splat; - if (lIs248Splat(op0, &splat)) { - *vec = op1; - return LLVMInt32(splat); + if (bop->getOpcode() == llvm::Instruction::Add) { + if (llvm::isa(op0)) { + *vec = op1; + return lExtractOffsetVector248Scale(vec); + } + else if (llvm::isa(op1)) { + *vec = op0; + return lExtractOffsetVector248Scale(vec); + } + else { + llvm::Value *s0 = lExtractOffsetVector248Scale(&op0); + llvm::Value *s1 = lExtractOffsetVector248Scale(&op1); + if (s0 == s1) { + *vec = llvm::BinaryOperator::Create(llvm::Instruction::Add, + op0, op1, "new_add", bop); + return s0; + } + else + return LLVMInt32(1); + } } - else if (lIs248Splat(op1, &splat)) { - *vec = op0; - return LLVMInt32(splat); + else if (bop->getOpcode() == llvm::Instruction::Mul) { + // Check each operand for being one of the scale factors we care about. + int splat; + if (lIs248Splat(op0, &splat)) { + *vec = op1; + return LLVMInt32(splat); + } + else if (lIs248Splat(op1, &splat)) { + *vec = op0; + return LLVMInt32(splat); + } + else + return LLVMInt32(1); } else return LLVMInt32(1);