diff --git a/Makefile b/Makefile
index ca55a734..01746fa4 100644
--- a/Makefile
+++ b/Makefile
@@ -85,7 +85,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
opt.h stmt.h sym.h type.h util.h
TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
- generic-16 generic-1
+ generic-16 generic-32 generic-1
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
builtins/dispatch.ll
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
diff --git a/builtins.cpp b/builtins.cpp
index 1682db9a..b94fa04f 100644
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -847,6 +847,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
builtins_bitcode_generic_16_length,
module, symbolTable);
break;
+ case 32:
+ extern unsigned char builtins_bitcode_generic_32[];
+ extern int builtins_bitcode_generic_32_length;
+ AddBitcodeToModule(builtins_bitcode_generic_32,
+ builtins_bitcode_generic_32_length,
+ module, symbolTable);
+ break;
case 1:
extern unsigned char builtins_bitcode_generic_1[];
extern int builtins_bitcode_generic_1_length;
diff --git a/builtins/target-generic-32.ll b/builtins/target-generic-32.ll
new file mode 100644
index 00000000..5f89bcdf
--- /dev/null
+++ b/builtins/target-generic-32.ll
@@ -0,0 +1,33 @@
+;; Copyright (c) 2010-2012, Intel Corporation
+;; All rights reserved.
+;;
+;; Redistribution and use in source and binary forms, with or without
+;; modification, are permitted provided that the following conditions are
+;; met:
+;;
+;; * Redistributions of source code must retain the above copyright
+;; notice, this list of conditions and the following disclaimer.
+;;
+;; * Redistributions in binary form must reproduce the above copyright
+;; notice, this list of conditions and the following disclaimer in the
+;; documentation and/or other materials provided with the distribution.
+;;
+;; * Neither the name of Intel Corporation nor the names of its
+;; contributors may be used to endorse or promote products derived from
+;; this software without specific prior written permission.
+;;
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+define(`WIDTH',`32')
+include(`target-generic-common.ll')
diff --git a/ispc.cpp b/ispc.cpp
index bd832825..3a2134d1 100644
--- a/ispc.cpp
+++ b/ispc.cpp
@@ -257,6 +257,14 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->allOffMaskIsSafe = true;
t->maskBitCount = 1;
}
+ else if (!strcasecmp(isa, "generic-32")) {
+ t->isa = Target::GENERIC;
+ t->nativeVectorWidth = 32;
+ t->vectorWidth = 32;
+ t->maskingIsFree = true;
+ t->allOffMaskIsSafe = true;
+ t->maskBitCount = 1;
+ }
else if (!strcasecmp(isa, "generic-1")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 1;
@@ -313,6 +321,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
llvm::TargetMachine *targetMachine = t->GetTargetMachine();
const llvm::TargetData *targetData = targetMachine->getTargetData();
t->is32Bit = (targetData->getPointerSize() == 4);
+ Assert(t->vectorWidth <= ISPC_MAX_NVEC);
}
return !error;
@@ -344,7 +353,7 @@ Target::SupportedTargetISAs() {
#ifndef LLVM_3_0
", avx2, avx2-x2"
#endif // !LLVM_3_0
- ", generic-4, generic-8, generic-16, generic-1";
+ ", generic-1, generic-4, generic-8, generic-16, generic-32";
}
diff --git a/ispc.h b/ispc.h
index d0d0c3f7..bb551a6d 100644
--- a/ispc.h
+++ b/ispc.h
@@ -71,7 +71,7 @@
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
targets.
*/
-#define ISPC_MAX_NVEC 16
+#define ISPC_MAX_NVEC 32
// Forward declarations of a number of widely-used LLVM types
namespace llvm {
diff --git a/ispc.vcxproj b/ispc.vcxproj
index 6971ce9a..34ef9373 100755
--- a/ispc.vcxproj
+++ b/ispc.vcxproj
@@ -29,6 +29,7 @@
+
@@ -264,6 +265,19 @@
Building gen-bitcode-generic-16.cpp
+
+
+ Document
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp
+ gen-bitcode-generic-32.cpp
+ builtins\util.m4;builtins\target-generic-common.ll
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp
+ gen-bitcode-generic-32.cpp
+ builtins\util.m4;builtins\target-generic-common.ll
+ Building gen-bitcode-generic-32.cpp
+ Building gen-bitcode-generic-32.cpp
+
+
Document
diff --git a/opt.cpp b/opt.cpp
index 34cdab0f..063be681 100644
--- a/opt.cpp
+++ b/opt.cpp
@@ -265,6 +265,124 @@ lGEPInst(llvm::Value *ptr, llvm::Value *offset, const char *name,
}
+/** Given a vector of constant values (int, float, or bool) representing an
+ execution mask, convert it to a bitvector where the 0th bit corresponds
+ to the first vector value and so forth.
+*/
+static uint32_t
+lConstElementsToMask(const llvm::SmallVector &elements) {
+ Assert(elements.size() <= 32);
+
+ uint32_t mask = 0;
+ for (unsigned int i = 0; i < elements.size(); ++i) {
+ llvm::APInt intMaskValue;
+ // SSE has the "interesting" approach of encoding blending
+ // masks as .
+ llvm::ConstantFP *cf = llvm::dyn_cast(elements[i]);
+ if (cf != NULL) {
+ llvm::APFloat apf = cf->getValueAPF();
+ intMaskValue = apf.bitcastToAPInt();
+ }
+ else {
+ // Otherwise get it as an int
+ llvm::ConstantInt *ci = llvm::dyn_cast(elements[i]);
+ Assert(ci != NULL); // vs return -1 if NULL?
+ intMaskValue = ci->getValue();
+ }
+ // Is the high-bit set? If so, OR in the appropriate bit in
+ // the result mask
+ if (intMaskValue.countLeadingOnes() > 0)
+ mask |= (1 << i);
+ }
+ return mask;
+}
+
+
+/** Given an llvm::Value represinting a vector mask, see if the value is a
+ constant. If so, return true and set *bits to be the integer mask
+ found by taking the high bits of the mask values in turn and
+ concatenating them into a single integer. In other words, given the
+ 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >, we have 0b1001 = 9.
+ */
+static bool
+lGetMask(llvm::Value *factor, uint32_t *mask) {
+#ifndef LLVM_3_0
+ llvm::ConstantDataVector *cdv = llvm::dyn_cast(factor);
+ if (cdv != NULL) {
+ llvm::SmallVector elements;
+ for (int i = 0; i < (int)cdv->getNumElements(); ++i)
+ elements.push_back(cdv->getElementAsConstant(i));
+ *mask = lConstElementsToMask(elements);
+ return true;
+ }
+#endif
+
+ llvm::ConstantVector *cv = llvm::dyn_cast(factor);
+ if (cv != NULL) {
+ llvm::SmallVector elements;
+#ifndef LLVM_3_0
+ for (int i = 0; i < (int)cv->getNumOperands(); ++i) {
+ llvm::Constant *c =
+ llvm::dyn_cast(cv->getOperand(i));
+ if (c == NULL)
+ return NULL;
+ elements.push_back(c);
+ }
+#else
+ cv->getVectorElements(elements);
+#endif
+ *mask = lConstElementsToMask(elements);
+ return true;
+ }
+ else if (llvm::isa(factor)) {
+ *mask = 0;
+ return true;
+ }
+ else {
+#if 0
+ llvm::ConstantExpr *ce = llvm::dyn_cast(factor);
+ if (ce != NULL) {
+ llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
+ const llvm::TargetData *td = targetMachine->getTargetData();
+ llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td);
+ c->dump();
+ factor = c;
+ }
+ // else we should be able to handle it above...
+ Assert(!llvm::isa(factor));
+#endif
+ return false;
+ }
+}
+
+
+enum MaskStatus { ALL_ON, ALL_OFF, MIXED, UNKNOWN };
+
+/** Determines if the given mask value is all on, all off, mixed, or
+ unknown at compile time.
+*/
+static MaskStatus
+lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) {
+ uint32_t bits;
+ if (lGetMask(mask, &bits) == false)
+ return UNKNOWN;
+
+ if (bits == 0)
+ return ALL_OFF;
+
+ if (vecWidth == -1)
+ vecWidth = g->target.vectorWidth;
+ Assert(vecWidth <= 32);
+
+ for (int i = 0; i < vecWidth; ++i) {
+ if ((bits & (1ull << i)) == 0)
+ return MIXED;
+ }
+ return ALL_ON;
+}
+
+
///////////////////////////////////////////////////////////////////////////
void
@@ -559,12 +677,12 @@ private:
instruction for this optimization pass.
*/
struct BlendInstruction {
- BlendInstruction(llvm::Function *f, int ao, int o0, int o1, int of)
+ BlendInstruction(llvm::Function *f, uint32_t ao, int o0, int o1, int of)
: function(f), allOnMask(ao), op0(o0), op1(o1), opFactor(of) { }
/** Function pointer for the blend instruction */
llvm::Function *function;
/** Mask value for an "all on" mask for this instruction */
- int allOnMask;
+ uint32_t allOnMask;
/** The operand number in the llvm CallInst corresponds to the
first operand to blend with. */
int op0;
@@ -609,99 +727,6 @@ IntrinsicsOpt::IntrinsicsOpt()
}
-/** Given a vector of constant values (int, float, or bool) representing an
- execution mask, convert it to a bitvector where the 0th bit corresponds
- to the first vector value and so forth.
-*/
-static int
-lConstElementsToMask(const llvm::SmallVector &elements) {
- Assert(elements.size() <= 32);
-
- int mask = 0;
- for (unsigned int i = 0; i < elements.size(); ++i) {
- llvm::APInt intMaskValue;
- // SSE has the "interesting" approach of encoding blending
- // masks as .
- llvm::ConstantFP *cf = llvm::dyn_cast(elements[i]);
- if (cf != NULL) {
- llvm::APFloat apf = cf->getValueAPF();
- intMaskValue = apf.bitcastToAPInt();
- }
- else {
- // Otherwise get it as an int
- llvm::ConstantInt *ci = llvm::dyn_cast(elements[i]);
- Assert(ci != NULL); // vs return -1 if NULL?
- intMaskValue = ci->getValue();
- }
- // Is the high-bit set? If so, OR in the appropriate bit in
- // the result mask
- if (intMaskValue.countLeadingOnes() > 0)
- mask |= (1 << i);
- }
- return mask;
-}
-
-
-/** Given an llvm::Value represinting a vector mask, see if the value is a
- constant. If so, return the integer mask found by taking the high bits
- of the mask values in turn and concatenating them into a single integer.
- In other words, given the 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >,
- we have 0b1001 = 9.
- */
-static int
-lGetMask(llvm::Value *factor) {
- /* FIXME: This will break if we ever do 32-wide compilation, in which case
- it don't be possible to distinguish between -1 for "don't know" and
- "known and all bits on". */
- Assert(g->target.vectorWidth < 32);
-
-#ifndef LLVM_3_0
- llvm::ConstantDataVector *cdv = llvm::dyn_cast(factor);
- if (cdv != NULL) {
- llvm::SmallVector elements;
- for (int i = 0; i < (int)cdv->getNumElements(); ++i)
- elements.push_back(cdv->getElementAsConstant(i));
- return lConstElementsToMask(elements);
- }
-#endif
-
- llvm::ConstantVector *cv = llvm::dyn_cast(factor);
- if (cv != NULL) {
- llvm::SmallVector elements;
-#ifndef LLVM_3_0
- for (int i = 0; i < (int)cv->getNumOperands(); ++i) {
- llvm::Constant *c =
- llvm::dyn_cast(cv->getOperand(i));
- if (c == NULL)
- return NULL;
- elements.push_back(c);
- }
-#else
- cv->getVectorElements(elements);
-#endif
- return lConstElementsToMask(elements);
- }
- else if (llvm::isa(factor))
- return 0;
- else {
-#if 0
- llvm::ConstantExpr *ce = llvm::dyn_cast(factor);
- if (ce != NULL) {
- llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
- const llvm::TargetData *td = targetMachine->getTargetData();
- llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td);
- c->dump();
- factor = c;
- }
- // else we should be able to handle it above...
- Assert(!llvm::isa(factor));
-#endif
- return -1;
- }
-}
-
-
/** Given an llvm::Value, return true if we can determine that it's an
undefined value. This only makes a weak attempt at chasing this down,
only detecting flat-out undef values, and bitcasts of undef values.
@@ -779,26 +804,28 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
goto restart;
}
- int mask = lGetMask(factor);
- llvm::Value *value = NULL;
- if (mask == 0)
- // Mask all off -> replace with the first blend value
- value = v[0];
- else if (mask == blend->allOnMask)
- // Mask all on -> replace with the second blend value
- value = v[1];
+ uint32_t mask;
+ if (lGetMask(factor, &mask) == true) {
+ llvm::Value *value = NULL;
+ if (mask == 0)
+ // Mask all off -> replace with the first blend value
+ value = v[0];
+ else if (mask == blend->allOnMask)
+ // Mask all on -> replace with the second blend value
+ value = v[1];
- if (value != NULL) {
- llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
- iter, value);
- modifiedAny = true;
- goto restart;
+ if (value != NULL) {
+ llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
+ iter, value);
+ modifiedAny = true;
+ goto restart;
+ }
}
}
else if (matchesMaskInstruction(callInst->getCalledFunction())) {
llvm::Value *factor = callInst->getArgOperand(0);
- int mask = lGetMask(factor);
- if (mask != -1) {
+ uint32_t mask;
+ if (lGetMask(factor, &mask) == true) {
// If the vector-valued mask has a known value, replace it
// with the corresponding integer mask from its elements
// high bits.
@@ -812,71 +839,75 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
else if (callInst->getCalledFunction() == avxMaskedLoad32 ||
callInst->getCalledFunction() == avxMaskedLoad64) {
llvm::Value *factor = callInst->getArgOperand(1);
- int mask = lGetMask(factor);
- if (mask == 0) {
- // nothing being loaded, replace with undef value
- llvm::Type *returnType = callInst->getType();
- Assert(llvm::isa(returnType));
- llvm::Value *undefValue = llvm::UndefValue::get(returnType);
- llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
- iter, undefValue);
- modifiedAny = true;
- goto restart;
- }
- else if (mask == 0xff) {
- // all lanes active; replace with a regular load
- llvm::Type *returnType = callInst->getType();
- Assert(llvm::isa(returnType));
- // cast the i8 * to the appropriate type
- const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast");
- llvm::Value *castPtr =
- new llvm::BitCastInst(callInst->getArgOperand(0),
- llvm::PointerType::get(returnType, 0),
- name, callInst);
- lCopyMetadata(castPtr, callInst);
- int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
- name = LLVMGetName(callInst->getArgOperand(0), "_load");
- llvm::Instruction *loadInst =
- new llvm::LoadInst(castPtr, name, false /* not volatile */,
- align, (llvm::Instruction *)NULL);
- lCopyMetadata(loadInst, callInst);
- llvm::ReplaceInstWithInst(callInst, loadInst);
- modifiedAny = true;
- goto restart;
+ uint32_t mask;
+ if (lGetMask(factor, &mask) == true) {
+ if (mask == 0) {
+ // nothing being loaded, replace with undef value
+ llvm::Type *returnType = callInst->getType();
+ Assert(llvm::isa(returnType));
+ llvm::Value *undefValue = llvm::UndefValue::get(returnType);
+ llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
+ iter, undefValue);
+ modifiedAny = true;
+ goto restart;
+ }
+ else if (mask == 0xff) {
+ // all lanes active; replace with a regular load
+ llvm::Type *returnType = callInst->getType();
+ Assert(llvm::isa(returnType));
+ // cast the i8 * to the appropriate type
+ const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast");
+ llvm::Value *castPtr =
+ new llvm::BitCastInst(callInst->getArgOperand(0),
+ llvm::PointerType::get(returnType, 0),
+ name, callInst);
+ lCopyMetadata(castPtr, callInst);
+ int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
+ name = LLVMGetName(callInst->getArgOperand(0), "_load");
+ llvm::Instruction *loadInst =
+ new llvm::LoadInst(castPtr, name, false /* not volatile */,
+ align, (llvm::Instruction *)NULL);
+ lCopyMetadata(loadInst, callInst);
+ llvm::ReplaceInstWithInst(callInst, loadInst);
+ modifiedAny = true;
+ goto restart;
+ }
}
}
else if (callInst->getCalledFunction() == avxMaskedStore32 ||
callInst->getCalledFunction() == avxMaskedStore64) {
// NOTE: mask is the 2nd parameter, not the 3rd one!!
llvm::Value *factor = callInst->getArgOperand(1);
- int mask = lGetMask(factor);
- if (mask == 0) {
- // nothing actually being stored, just remove the inst
- callInst->eraseFromParent();
- modifiedAny = true;
- goto restart;
- }
- else if (mask == 0xff) {
- // all lanes storing, so replace with a regular store
- llvm::Value *rvalue = callInst->getArgOperand(2);
- llvm::Type *storeType = rvalue->getType();
- const char *name = LLVMGetName(callInst->getArgOperand(0),
- "_ptrcast");
- llvm::Value *castPtr =
- new llvm::BitCastInst(callInst->getArgOperand(0),
- llvm::PointerType::get(storeType, 0),
- name, callInst);
- lCopyMetadata(castPtr, callInst);
+ uint32_t mask;
+ if (lGetMask(factor, &mask) == true) {
+ if (mask == 0) {
+ // nothing actually being stored, just remove the inst
+ callInst->eraseFromParent();
+ modifiedAny = true;
+ goto restart;
+ }
+ else if (mask == 0xff) {
+ // all lanes storing, so replace with a regular store
+ llvm::Value *rvalue = callInst->getArgOperand(2);
+ llvm::Type *storeType = rvalue->getType();
+ const char *name = LLVMGetName(callInst->getArgOperand(0),
+ "_ptrcast");
+ llvm::Value *castPtr =
+ new llvm::BitCastInst(callInst->getArgOperand(0),
+ llvm::PointerType::get(storeType, 0),
+ name, callInst);
+ lCopyMetadata(castPtr, callInst);
- llvm::StoreInst *storeInst =
- new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
- int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
- storeInst->setAlignment(align);
- lCopyMetadata(storeInst, callInst);
- llvm::ReplaceInstWithInst(callInst, storeInst);
+ llvm::StoreInst *storeInst =
+ new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
+ int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
+ storeInst->setAlignment(align);
+ lCopyMetadata(storeInst, callInst);
+ llvm::ReplaceInstWithInst(callInst, storeInst);
- modifiedAny = true;
- goto restart;
+ modifiedAny = true;
+ goto restart;
+ }
}
}
}
@@ -949,13 +980,13 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::SelectInst *selectInst = llvm::dyn_cast(&*iter);
if (selectInst != NULL && selectInst->getType()->isVectorTy()) {
llvm::Value *factor = selectInst->getOperand(0);
- int mask = lGetMask(factor);
- int allOnMask = (1 << g->target.vectorWidth) - 1;
+
+ MaskStatus maskStatus = lGetMaskStatus(factor);
llvm::Value *value = NULL;
- if (mask == allOnMask)
+ if (maskStatus == ALL_ON)
// Mask all on -> replace with the first select value
value = selectInst->getOperand(1);
- else if (mask == 0)
+ else if (maskStatus == ALL_OFF)
// Mask all off -> replace with the second select value
value = selectInst->getOperand(2);
@@ -976,8 +1007,8 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
if (calledFunc == NULL || calledFunc != m->module->getFunction("__movmsk"))
continue;
- int mask = lGetMask(callInst->getArgOperand(0));
- if (mask != -1) {
+ uint32_t mask;
+ if (lGetMask(callInst->getArgOperand(0), &mask) == true) {
#if 0
fprintf(stderr, "mask %d\n", mask);
callInst->getArgOperand(0)->dump();
@@ -1964,10 +1995,8 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
llvm::Value *rvalue = callInst->getArgOperand(1);
llvm::Value *mask = callInst->getArgOperand(2);
- int allOnMask = (1 << g->target.vectorWidth) - 1;
-
- int maskAsInt = lGetMask(mask);
- if (maskAsInt == 0) {
+ MaskStatus maskStatus = lGetMaskStatus(mask);
+ if (maskStatus == ALL_OFF) {
// Zero mask - no-op, so remove the store completely. (This
// may in turn lead to being able to optimize out instructions
// that compute the rvalue...)
@@ -1975,11 +2004,10 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
modifiedAny = true;
goto restart;
}
- else if (maskAsInt == allOnMask) {
+ else if (maskStatus == ALL_ON) {
// The mask is all on, so turn this into a regular store
llvm::Type *rvalueType = rvalue->getType();
- llvm::Type *ptrType =
- llvm::PointerType::get(rvalueType, 0);
+ llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0);
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
lCopyMetadata(lvalue, callInst);
@@ -2072,20 +2100,18 @@ MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
// Got one; grab the operands
llvm::Value *ptr = callInst->getArgOperand(0);
llvm::Value *mask = callInst->getArgOperand(1);
- int allOnMask = (1 << g->target.vectorWidth) - 1;
- int maskAsInt = lGetMask(mask);
- if (maskAsInt == 0) {
+ MaskStatus maskStatus = lGetMaskStatus(mask);
+ if (maskStatus == ALL_OFF) {
// Zero mask - no-op, so replace the load with an undef value
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
iter, llvm::UndefValue::get(callInst->getType()));
modifiedAny = true;
goto restart;
}
- else if (maskAsInt == allOnMask) {
+ else if (maskStatus == ALL_ON) {
// The mask is all on, so turn this into a regular load
- llvm::Type *ptrType =
- llvm::PointerType::get(callInst->getType(), 0);
+ llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0);
ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load",
callInst);
llvm::Instruction *load =
@@ -2558,18 +2584,6 @@ public:
char GatherCoalescePass::ID = 0;
-/* Returns true if the mask is known at compile time to be "all on". */
-static bool
-lIsMaskAllOn(llvm::Value *mask) {
- int m = lGetMask(mask);
- if (m == -1)
- return false;
-
- int allOnMask = (1 << g->target.vectorWidth) - 1;
- return (m == allOnMask);
-}
-
-
/** Representation of a memory load that the gather coalescing code has
decided to generate.
*/
@@ -3497,7 +3511,7 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) {
// Then and only then do we have a common base pointer with all
// offsets from that constants (in which case we can potentially
// coalesce).
- if (lIsMaskAllOn(mask) == false)
+ if (lGetMaskStatus(mask) != ALL_ON)
continue;
if (!LLVMVectorValuesAllEqual(variableOffsets))
diff --git a/run_tests.py b/run_tests.py
index ce5e98f1..79465267 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -33,7 +33,7 @@ parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order
parser.add_option("-g", "--generics-include", dest="include_file", help="Filename for header implementing functions for generics",
default=None)
parser.add_option('-t', '--target', dest='target',
- help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16)',
+ help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16, generic-32)',
default="sse4")
parser.add_option('-a', '--arch', dest='arch',
help='Set architecture (x86, x86-64)',
@@ -69,6 +69,9 @@ if is_generic_target and options.include_file == None:
elif options.target == "generic-16":
sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-16.h\n")
options.include_file = "examples/intrinsics/generic-16.h"
+ elif options.target == "generic-32":
+ sys.stderr.write("No generics #include specified and no default available for \"generic-32\" target.\n")
+ sys.exit(1)
if options.compiler_exe == None:
if is_windows: