Broadcast implementation as InsertElement+Shuffle and related improvements

2013-04-10 02:18:24 +04:00
parent 603abf70dc
commit 5898532605
6 changed files with 153 additions and 56 deletions
--- a/llvmutil.cpp
+++ b/llvmutil.cpp
@@ -601,44 +601,74 @@ lGetIntValue(llvm::Value *offset) {


 void
-LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
+LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
                       llvm::Value **elements) {
-    for (int i = 0; i < vectorWidth; ++i)
+    for (int i = 0; i < vectorWidth; ++i) {
        elements[i] = NULL;
+    }

-    while (ie != NULL) {
-        int64_t iOffset = lGetIntValue(ie->getOperand(2));
-        Assert(iOffset >= 0 && iOffset < vectorWidth);
-        Assert(elements[iOffset] == NULL);
+    // Catch a pattern of InsertElement chain.
+    if (llvm::InsertElementInst *ie =
+            llvm::dyn_cast<llvm::InsertElementInst>(inst)) {
+        while (ie != NULL) {
+            int64_t iOffset = lGetIntValue(ie->getOperand(2));
+            Assert(iOffset >= 0 && iOffset < vectorWidth);
+            Assert(elements[iOffset] == NULL);

-        // Get the scalar value from this insert
-        elements[iOffset] = ie->getOperand(1);
+            // Get the scalar value from this insert
+            elements[iOffset] = ie->getOperand(1);

-        // Do we have another insert?
-        llvm::Value *insertBase = ie->getOperand(0);
-        ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
-        if (ie == NULL) {
-            if (llvm::isa<llvm::UndefValue>(insertBase))
-                return;
+            // Do we have another insert?
+            llvm::Value *insertBase = ie->getOperand(0);
+            ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
+            if (ie == NULL) {
+                if (llvm::isa<llvm::UndefValue>(insertBase)) {
+                    return;
+                }

-            // Get the value out of a constant vector if that's what we
-            // have
-            llvm::ConstantVector *cv =
-                llvm::dyn_cast<llvm::ConstantVector>(insertBase);
+                // Get the value out of a constant vector if that's what we
+                // have
+                llvm::ConstantVector *cv =
+                    llvm::dyn_cast<llvm::ConstantVector>(insertBase);

-            // FIXME: this assert is a little questionable; we probably
-            // shouldn't fail in this case but should just return an
-            // incomplete result.  But there aren't currently any known
-            // cases where we have anything other than an undef value or a
-            // constant vector at the base, so if that ever does happen,
-            // it'd be nice to know what happend so that perhaps we can
-            // handle it.
-            // FIXME: Also, should we handle ConstantDataVectors with
-            // LLVM3.1?  What about ConstantAggregateZero values??
-            Assert(cv != NULL);
+                // FIXME: this assert is a little questionable; we probably
+                // shouldn't fail in this case but should just return an
+                // incomplete result.  But there aren't currently any known
+                // cases where we have anything other than an undef value or a
+                // constant vector at the base, so if that ever does happen,
+                // it'd be nice to know what happend so that perhaps we can
+                // handle it.
+                // FIXME: Also, should we handle ConstantDataVectors with
+                // LLVM3.1?  What about ConstantAggregateZero values??
+                Assert(cv != NULL);

-            Assert(iOffset < (int)cv->getNumOperands());
-            elements[iOffset] = cv->getOperand((int32_t)iOffset);
+                Assert(iOffset < (int)cv->getNumOperands());
+                elements[iOffset] = cv->getOperand((int32_t)iOffset);
+            }
+        }
+    }
+    // Catch a pattern of broadcast implemented as InsertElement + Shuffle:
+    //   %broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
+    //   %broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
+    //                                              <4 x i32> zeroinitializer
+    else if (llvm::ShuffleVectorInst *shuf =
+        llvm::dyn_cast<llvm::ShuffleVectorInst>(inst)) {
+        llvm::Value *indices = shuf->getOperand(2);
+        if (llvm::isa<llvm::ConstantAggregateZero>(indices)) {
+            llvm::Value *op = shuf->getOperand(0);
+            llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(op);
+            if (ie != NULL &&
+                llvm::isa<llvm::UndefValue>(ie->getOperand(0))) {
+                llvm::ConstantInt *ci =
+                    llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2));
+
+                if (ci->isZero()) {
+                    for (int i = 0; i < vectorWidth; ++i) {
+                        elements[i] = ie->getOperand(1);
+                    }
+                    return;
+                }
+            }
        }
    }
 }
@@ -694,10 +724,10 @@ lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength,
    else
        Assert(LLVMVectorValuesAllEqual(val));

-    llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(val);
-    if (ie != NULL) {
+    if (llvm::isa<llvm::InsertElementInst>(val) ||
+        llvm::isa<llvm::ShuffleVectorInst>(val)) {
        llvm::Value *elts[ISPC_MAX_NVEC];
-        LLVMFlattenInsertChain(ie, g->target->getVectorWidth(), elts);
+        LLVMFlattenInsertChain(val, g->target->getVectorWidth(), elts);
        // We just need to check the scalar first value, since we know that
        // all elements are equal
        return lIsExactMultiple(elts[0], baseValue, vectorLength,
@@ -1440,10 +1470,10 @@ lExtractFirstVectorElement(llvm::Value *v,

    // If we have a chain of insertelement instructions, then we can just
    // flatten them out and grab the value for the first one.
-    llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(v);
-    if (ie != NULL) {
+    if (llvm::isa<llvm::InsertElementInst>(v) ||
+        llvm::isa<llvm::ShuffleVectorInst>(v)) {
        llvm::Value *elements[ISPC_MAX_NVEC];
-        LLVMFlattenInsertChain(ie, vt->getNumElements(), elements);
+        LLVMFlattenInsertChain(v, vt->getNumElements(), elements);
        return elements[0];
    }