Improve gather->vector load optimization to detect <linear sequence>-<uniform> case.

Previously, we didn't handle subtraction ops when deciphering offsets in order to try to change gathers t evictor loads.
2011-10-11 13:24:40 -07:00
parent 06d70376ea
commit 1198520029
2 changed files with 67 additions and 23 deletions
--- a/opt.cpp
+++ b/opt.cpp
@@ -1906,7 +1906,8 @@ lVectorIsLinear(llvm::Value *v[ISPC_MAX_NVEC], int stride) {
        bool anyAdds = false, allAdds = true;
        for (int i = 0; i < g->target.vectorWidth; ++i) {
            llvm::ConstantExpr *ce = llvm::dyn_cast<llvm::ConstantExpr>(v[i]);
-            if (ce->getOpcode() == llvm::Instruction::Add)
+            if (ce->getOpcode() == llvm::Instruction::Add ||
                ce->getOpcode() == llvm::Instruction::Sub)
                anyAdds = true;
            else 
                allAdds = false;
@@ -1938,9 +1939,20 @@ lVectorIsLinear(llvm::Value *v[ISPC_MAX_NVEC], int stride) {
                        otherBit[i] = ce->getOperand(0);
                    }
                }
                else if (ce->getOpcode() == llvm::Instruction::Sub) {
                    // Treat subtraction as an add with a negative value..
                    if (llvm::isa<llvm::ConstantInt>(ce->getOperand(0))) {
                        intBit[i] = ce->getOperand(0);
                        otherBit[i] = llvm::ConstantExpr::getNeg(ce->getOperand(1));
                    }
                    else {
                        intBit[i] = ce->getOperand(1);
                        otherBit[i] = llvm::ConstantExpr::getNeg(ce->getOperand(0));
                    }
                }
                else {
-                    // We don't have an Add, so pretend we have an add with
+                    // We don't have an Add or a Sub, so pretend we have an
-                    // zero.
+                    // add with zero.
                    intBit[i] = LLVMInt32(0);
                    otherBit[i] = v[i];
                }
@@ -1975,14 +1987,20 @@ lVectorIsLinear(llvm::Value *v[ISPC_MAX_NVEC], int stride) {
        // FIXME: here, too, what about cases with v[0] being a load or something
        // and then everything after element 0 being a binary operator with an add.
        // That won't get caught by this case??
-        bool anyAdd = false;
+        bool anyAdd = false, anySub = false;
        for (int i = 0; i < g->target.vectorWidth; ++i) {
            llvm::BinaryOperator *bopi = llvm::dyn_cast<llvm::BinaryOperator>(v[i]);
-            if (bopi && bopi->getOpcode() == llvm::Instruction::Add)
+            if (bopi) {
-                anyAdd = true;
+                if (bopi->getOpcode() == llvm::Instruction::Add)
                    anyAdd = true;
                else if (bopi->getOpcode() == llvm::Instruction::Sub)
                    anySub = true;
            }
        }
-        if (anyAdd) {
+        if (anyAdd && anySub)
            return false;
        if (anyAdd || anySub) {
            // is one of the operands the same for all elements?  if so, then just
            // need to check this case for the other operand...
@@ -1993,38 +2011,51 @@ lVectorIsLinear(llvm::Value *v[ISPC_MAX_NVEC], int stride) {
            // more robust to switching the ordering of operands, in case
            // that ever happens...
            for (int operand = 0; operand <= 1; ++operand) {
-                llvm::Value *addOperandValues[ISPC_MAX_NVEC];
+                llvm::Value *addSubOperandValues[ISPC_MAX_NVEC];
                // Go through the vector elements and grab the operand'th
                // one if this is an add or the v
                for (int i = 0; i < g->target.vectorWidth; ++i) {
                    llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v[i]);
-                    if (bop->getOpcode() == llvm::Instruction::Add)
+                    if (bop->getOpcode() == llvm::Instruction::Add ||
-                        addOperandValues[i] = bop->getOperand(operand);
+                        bop->getOpcode() == llvm::Instruction::Sub)
                        addSubOperandValues[i] = bop->getOperand(operand);
                    else
-                        // The other guys are adds, so we'll treat this as
+                        // The other guys are adds or subtracts, so we'll
-                        // an "add 0" in the below, so just grab the value
+                        // treat this as an "add 0" in the below, so just
-                        // v[i] itself
+                        // grab the value v[i] itself
-                        addOperandValues[i] = v[i];
+                        addSubOperandValues[i] = v[i];
                }
-                if (lVectorValuesAllEqual(addOperandValues)) {
+                if (lVectorValuesAllEqual(addSubOperandValues) && 
                    (anyAdd || operand == 1)) {
                    // If this operand's values are all equal, then the
-                    // overall result is a linear sequence if the second
+                    // overall result is an ascending linear sequence if
-                    // operand's values are themselves a linear sequence...
+                    // the other operand's values are themselves a linear
                    // sequence and if either this is an add or we're
                    // looking at the 2nd operand.  i.e.:
                    //
                    // unif + programIndex -> ascending linear sequence
                    // programIndex + unif -> ascending linear seqeuence
                    // programIndex - unif -> ascending linear seqeuence
                    // unif - programIndex -> *descending* linear seqeuence
                    //
                    // We don't match the descending case for now; at some
                    // future point we could generate code for that as a
                    // vector load + shuffle.
                    int otherOperand = operand ^ 1;
                    for (int i = 0; i < g->target.vectorWidth; ++i) {
                        llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v[i]);
-                        if (bop->getOpcode() == llvm::Instruction::Add)
+                        if (bop->getOpcode() == llvm::Instruction::Add ||
-                            addOperandValues[i] = bop->getOperand(otherOperand);
+                            bop->getOpcode() == llvm::Instruction::Sub)
                            addSubOperandValues[i] = bop->getOperand(otherOperand);
                        else
-                            addOperandValues[i] = LLVMInt32(0);
+                            addSubOperandValues[i] = LLVMInt32(0);
                    }
-                    return lVectorIsLinear(addOperandValues, stride);
+                    return lVectorIsLinear(addSubOperandValues, stride);
                }
            }
        }
-
+        else if (bop->getOpcode() == llvm::Instruction::Mul) {
        if (bop->getOpcode() == llvm::Instruction::Mul) {
            // Finally, if we have a multiply, then if one of the operands
            // has the same value for all elements and if the other operand
            // is a linear sequence such that the scale times the sequence
--- a/tests/gather-to-vload-neg-offset.ispc
+++ b/tests/gather-to-vload-neg-offset.ispc
@@ -0,0 +1,13 @@
 export uniform int width() { return programCount; }
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex]; 
    int index = programIndex + 5;
    RET[index-b] = a; 
 }
 export void result(uniform float RET[]) {
    RET[programIndex] = 1 + programIndex;
 }