Handle more forms of constant vectors in lGetMask().

Various optimization passes depend on turning a compile-time constant mask into a bit vector; it turns out that in LLVM3.1, constant vectors of ints/floats are represented with llvM::ConstantDataVector, but constant vectors of bools use llvm::ConstantVector (which is what LLVM 3.0 uses for all constant vectors). Now lGetMask() always does the llvm::ConstantVector path, to cover this case. This improves generated C++ code by eliminating things like select with an all on/off mask, turning movmask calls with constants into constant values, etc.
2012-04-18 11:34:28 -07:00
parent c202e9e106
commit 7c91b01125
1 changed files with 55 additions and 32 deletions
--- a/opt.cpp
+++ b/opt.cpp
@@ -586,40 +586,22 @@ IntrinsicsOpt::IntrinsicsOpt()
 }


-/** Given an llvm::Value represinting a vector mask, see if the value is a
-    constant.  If so, return the integer mask found by taking the high bits
-    of the mask values in turn and concatenating them into a single integer.
-    In other words, given the 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >, 
-    we have 0b1001 = 9.
- */
+/** Given a vector of constant values (int, float, or bool) representing an
+    execution mask, convert it to a bitvector where the 0th bit corresponds
+    to the first vector value and so forth.
+*/
 static int
-lGetMask(llvm::Value *factor) {
-    /* FIXME: This will break if we ever do 32-wide compilation, in which case
-       it don't be possible to distinguish between -1 for "don't know" and
-       "known and all bits on". */
-    Assert(g->target.vectorWidth < 32);
+lConstElementsToMask(const llvm::SmallVector<llvm::Constant *, 
+                                             ISPC_MAX_NVEC> &elements) {
+    Assert(elements.size() <= 32);

-#ifdef LLVM_3_1svn
-    llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(factor);
-#else
-    llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(factor);
-#endif
-    if (cv) {
    int mask = 0;
-        llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
-#ifdef LLVM_3_1svn
-        for (int i = 0; i < (int)cv->getNumElements(); ++i)
-            elements.push_back(cv->getElementAsConstant(i));
-#else
-        cv->getVectorElements(elements);
-#endif
-
    for (unsigned int i = 0; i < elements.size(); ++i) {
        llvm::APInt intMaskValue;
        // SSE has the "interesting" approach of encoding blending
        // masks as <n x float>.
        llvm::ConstantFP *cf = llvm::dyn_cast<llvm::ConstantFP>(elements[i]);
-            if (cf) {
+        if (cf != NULL) {
            llvm::APFloat apf = cf->getValueAPF();
            intMaskValue = apf.bitcastToAPInt();
        }
@@ -635,6 +617,47 @@ lGetMask(llvm::Value *factor) {
            mask |= (1 << i);
    }
    return mask;
+}
+
+
+/** Given an llvm::Value represinting a vector mask, see if the value is a
+    constant.  If so, return the integer mask found by taking the high bits
+    of the mask values in turn and concatenating them into a single integer.
+    In other words, given the 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >, 
+    we have 0b1001 = 9.
+ */
+static int
+lGetMask(llvm::Value *factor) {
+    /* FIXME: This will break if we ever do 32-wide compilation, in which case
+       it don't be possible to distinguish between -1 for "don't know" and
+       "known and all bits on". */
+    Assert(g->target.vectorWidth < 32);
+
+#ifdef LLVM_3_1svn
+    llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(factor);
+    if (cdv != NULL) {
+        llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
+        for (int i = 0; i < (int)cdv->getNumElements(); ++i)
+            elements.push_back(cdv->getElementAsConstant(i));
+        return lConstElementsToMask(elements);
+    }
+#endif
+
+    llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(factor);
+    if (cv != NULL) {
+        llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
+ #ifdef LLVM_3_1svn
+        for (int i = 0; i < (int)cv->getNumOperands(); ++i) {
+            llvm::Constant *c = 
+                llvm::dyn_cast<llvm::Constant>(cv->getOperand(i));
+            if (c == NULL)
+                return NULL;
+            elements.push_back(c);
+        }
+#else
+        cv->getVectorElements(elements);
+#endif
+        return lConstElementsToMask(elements);
    }
    else if (llvm::isa<llvm::ConstantAggregateZero>(factor))
        return 0;
@@ -1149,7 +1172,7 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets,
        // Indexing into global arrays can lead to this form, with
        // ConstantVectors..
        llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
-#ifdef LLVM_3_1svn
+ #ifdef LLVM_3_1svn
        for (int i = 0; i < (int)cv->getNumOperands(); ++i) {
            llvm::Constant *c = 
                llvm::dyn_cast<llvm::Constant>(cv->getOperand(i));