Fix performance regression introduced in be0c77d556

Effectively, the patterns that detected when given a gather or scatter in base+offsets form, the offsets were actually a multiple of 2/4/8, were no longer working. This change not only fixes this, but also expands the set of patterns that are matched by this. For example, given offsets of the form 4*v1 + 16*v2, it identifies a scale of 4 and new offsets of v1 + 4*v2. This fix makes the volume renderer run 1.19x faster, and noise 1.54x faster.
2012-01-19 17:54:21 -08:00
parent 2fb59c90cf
commit 4388338dad
1 changed files with 36 additions and 10 deletions
--- a/opt.cpp
+++ b/opt.cpp
@@ -1174,21 +1174,47 @@ lExtractOffsetVector248Scale(llvm::Value **vec) {
        return scale;
    }

-    // If we don't have a multiply, then just return
+
+    // If we don't have a binary operator, then just give up
    llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(*vec);
-    if (bop == NULL || bop->getOpcode() != llvm::Instruction::Mul)
+    if (bop == NULL)
        return LLVMInt32(1);

-    // Check each operand for being one of the scale factors we care about.
    llvm::Value *op0 = bop->getOperand(0), *op1 = bop->getOperand(1);
-    int splat;
-    if (lIs248Splat(op0, &splat)) {
-        *vec = op1;
-        return LLVMInt32(splat);
+    if (bop->getOpcode() == llvm::Instruction::Add) {
+        if (llvm::isa<llvm::ConstantAggregateZero>(op0)) {
+            *vec = op1;
+            return lExtractOffsetVector248Scale(vec);
+        }
+        else if (llvm::isa<llvm::ConstantAggregateZero>(op1)) {
+            *vec = op0;
+            return lExtractOffsetVector248Scale(vec);
+        }
+        else {
+            llvm::Value *s0 = lExtractOffsetVector248Scale(&op0);
+            llvm::Value *s1 = lExtractOffsetVector248Scale(&op1);
+            if (s0 == s1) {
+                *vec = llvm::BinaryOperator::Create(llvm::Instruction::Add,
+                                                    op0, op1, "new_add", bop);
+                return s0;
+            }
+            else
+                return LLVMInt32(1);
+        }
    }
-    else if (lIs248Splat(op1, &splat)) {
-        *vec = op0;
-        return LLVMInt32(splat);
+    else if (bop->getOpcode() == llvm::Instruction::Mul) {
+        // Check each operand for being one of the scale factors we care about.
+        int splat;
+        if (lIs248Splat(op0, &splat)) {
+            *vec = op1;
+            return LLVMInt32(splat);
+        }
+        else if (lIs248Splat(op1, &splat)) {
+            *vec = op0;
+            return LLVMInt32(splat);
+        }
+        else
+            return LLVMInt32(1);
    }
    else
        return LLVMInt32(1);