From d99bd279e8226add132a30157114fd7b35c7b8a2 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Thu, 3 May 2012 11:11:06 -0700 Subject: [PATCH] Add generic-32 target. --- Makefile | 2 +- builtins.cpp | 7 + builtins/target-generic-32.ll | 33 +++ ispc.cpp | 11 +- ispc.h | 2 +- ispc.vcxproj | 14 ++ opt.cpp | 412 ++++++++++++++++++---------------- run_tests.py | 5 +- 8 files changed, 283 insertions(+), 203 deletions(-) create mode 100644 builtins/target-generic-32.ll diff --git a/Makefile b/Makefile index ca55a734..01746fa4 100644 --- a/Makefile +++ b/Makefile @@ -85,7 +85,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \ HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \ opt.h stmt.h sym.h type.h util.h TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \ - generic-16 generic-1 + generic-16 generic-32 generic-1 BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \ builtins/dispatch.ll BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \ diff --git a/builtins.cpp b/builtins.cpp index 1682db9a..b94fa04f 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -847,6 +847,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod builtins_bitcode_generic_16_length, module, symbolTable); break; + case 32: + extern unsigned char builtins_bitcode_generic_32[]; + extern int builtins_bitcode_generic_32_length; + AddBitcodeToModule(builtins_bitcode_generic_32, + builtins_bitcode_generic_32_length, + module, symbolTable); + break; case 1: extern unsigned char builtins_bitcode_generic_1[]; extern int builtins_bitcode_generic_1_length; diff --git a/builtins/target-generic-32.ll b/builtins/target-generic-32.ll new file mode 100644 index 00000000..5f89bcdf --- /dev/null +++ b/builtins/target-generic-32.ll @@ -0,0 +1,33 @@ +;; Copyright (c) 2010-2012, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +define(`WIDTH',`32') +include(`target-generic-common.ll') diff --git a/ispc.cpp b/ispc.cpp index bd832825..3a2134d1 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -257,6 +257,14 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, t->allOffMaskIsSafe = true; t->maskBitCount = 1; } + else if (!strcasecmp(isa, "generic-32")) { + t->isa = Target::GENERIC; + t->nativeVectorWidth = 32; + t->vectorWidth = 32; + t->maskingIsFree = true; + t->allOffMaskIsSafe = true; + t->maskBitCount = 1; + } else if (!strcasecmp(isa, "generic-1")) { t->isa = Target::GENERIC; t->nativeVectorWidth = 1; @@ -313,6 +321,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa, llvm::TargetMachine *targetMachine = t->GetTargetMachine(); const llvm::TargetData *targetData = targetMachine->getTargetData(); t->is32Bit = (targetData->getPointerSize() == 4); + Assert(t->vectorWidth <= ISPC_MAX_NVEC); } return !error; @@ -344,7 +353,7 @@ Target::SupportedTargetISAs() { #ifndef LLVM_3_0 ", avx2, avx2-x2" #endif // !LLVM_3_0 - ", generic-4, generic-8, generic-16, generic-1"; + ", generic-1, generic-4, generic-8, generic-16, generic-32"; } diff --git a/ispc.h b/ispc.h index d0d0c3f7..bb551a6d 100644 --- a/ispc.h +++ b/ispc.h @@ -71,7 +71,7 @@ /** @def ISPC_MAX_NVEC maximum vector size of any of the compliation targets. */ -#define ISPC_MAX_NVEC 16 +#define ISPC_MAX_NVEC 32 // Forward declarations of a number of widely-used LLVM types namespace llvm { diff --git a/ispc.vcxproj b/ispc.vcxproj index 6971ce9a..34ef9373 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -29,6 +29,7 @@ + @@ -264,6 +265,19 @@ Building gen-bitcode-generic-16.cpp + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp + gen-bitcode-generic-32.cpp + builtins\util.m4;builtins\target-generic-common.ll + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp + gen-bitcode-generic-32.cpp + builtins\util.m4;builtins\target-generic-common.ll + Building gen-bitcode-generic-32.cpp + Building gen-bitcode-generic-32.cpp + + Document diff --git a/opt.cpp b/opt.cpp index 34cdab0f..063be681 100644 --- a/opt.cpp +++ b/opt.cpp @@ -265,6 +265,124 @@ lGEPInst(llvm::Value *ptr, llvm::Value *offset, const char *name, } +/** Given a vector of constant values (int, float, or bool) representing an + execution mask, convert it to a bitvector where the 0th bit corresponds + to the first vector value and so forth. +*/ +static uint32_t +lConstElementsToMask(const llvm::SmallVector &elements) { + Assert(elements.size() <= 32); + + uint32_t mask = 0; + for (unsigned int i = 0; i < elements.size(); ++i) { + llvm::APInt intMaskValue; + // SSE has the "interesting" approach of encoding blending + // masks as . + llvm::ConstantFP *cf = llvm::dyn_cast(elements[i]); + if (cf != NULL) { + llvm::APFloat apf = cf->getValueAPF(); + intMaskValue = apf.bitcastToAPInt(); + } + else { + // Otherwise get it as an int + llvm::ConstantInt *ci = llvm::dyn_cast(elements[i]); + Assert(ci != NULL); // vs return -1 if NULL? + intMaskValue = ci->getValue(); + } + // Is the high-bit set? If so, OR in the appropriate bit in + // the result mask + if (intMaskValue.countLeadingOnes() > 0) + mask |= (1 << i); + } + return mask; +} + + +/** Given an llvm::Value represinting a vector mask, see if the value is a + constant. If so, return true and set *bits to be the integer mask + found by taking the high bits of the mask values in turn and + concatenating them into a single integer. In other words, given the + 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >, we have 0b1001 = 9. + */ +static bool +lGetMask(llvm::Value *factor, uint32_t *mask) { +#ifndef LLVM_3_0 + llvm::ConstantDataVector *cdv = llvm::dyn_cast(factor); + if (cdv != NULL) { + llvm::SmallVector elements; + for (int i = 0; i < (int)cdv->getNumElements(); ++i) + elements.push_back(cdv->getElementAsConstant(i)); + *mask = lConstElementsToMask(elements); + return true; + } +#endif + + llvm::ConstantVector *cv = llvm::dyn_cast(factor); + if (cv != NULL) { + llvm::SmallVector elements; +#ifndef LLVM_3_0 + for (int i = 0; i < (int)cv->getNumOperands(); ++i) { + llvm::Constant *c = + llvm::dyn_cast(cv->getOperand(i)); + if (c == NULL) + return NULL; + elements.push_back(c); + } +#else + cv->getVectorElements(elements); +#endif + *mask = lConstElementsToMask(elements); + return true; + } + else if (llvm::isa(factor)) { + *mask = 0; + return true; + } + else { +#if 0 + llvm::ConstantExpr *ce = llvm::dyn_cast(factor); + if (ce != NULL) { + llvm::TargetMachine *targetMachine = g->target.GetTargetMachine(); + const llvm::TargetData *td = targetMachine->getTargetData(); + llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td); + c->dump(); + factor = c; + } + // else we should be able to handle it above... + Assert(!llvm::isa(factor)); +#endif + return false; + } +} + + +enum MaskStatus { ALL_ON, ALL_OFF, MIXED, UNKNOWN }; + +/** Determines if the given mask value is all on, all off, mixed, or + unknown at compile time. +*/ +static MaskStatus +lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) { + uint32_t bits; + if (lGetMask(mask, &bits) == false) + return UNKNOWN; + + if (bits == 0) + return ALL_OFF; + + if (vecWidth == -1) + vecWidth = g->target.vectorWidth; + Assert(vecWidth <= 32); + + for (int i = 0; i < vecWidth; ++i) { + if ((bits & (1ull << i)) == 0) + return MIXED; + } + return ALL_ON; +} + + /////////////////////////////////////////////////////////////////////////// void @@ -559,12 +677,12 @@ private: instruction for this optimization pass. */ struct BlendInstruction { - BlendInstruction(llvm::Function *f, int ao, int o0, int o1, int of) + BlendInstruction(llvm::Function *f, uint32_t ao, int o0, int o1, int of) : function(f), allOnMask(ao), op0(o0), op1(o1), opFactor(of) { } /** Function pointer for the blend instruction */ llvm::Function *function; /** Mask value for an "all on" mask for this instruction */ - int allOnMask; + uint32_t allOnMask; /** The operand number in the llvm CallInst corresponds to the first operand to blend with. */ int op0; @@ -609,99 +727,6 @@ IntrinsicsOpt::IntrinsicsOpt() } -/** Given a vector of constant values (int, float, or bool) representing an - execution mask, convert it to a bitvector where the 0th bit corresponds - to the first vector value and so forth. -*/ -static int -lConstElementsToMask(const llvm::SmallVector &elements) { - Assert(elements.size() <= 32); - - int mask = 0; - for (unsigned int i = 0; i < elements.size(); ++i) { - llvm::APInt intMaskValue; - // SSE has the "interesting" approach of encoding blending - // masks as . - llvm::ConstantFP *cf = llvm::dyn_cast(elements[i]); - if (cf != NULL) { - llvm::APFloat apf = cf->getValueAPF(); - intMaskValue = apf.bitcastToAPInt(); - } - else { - // Otherwise get it as an int - llvm::ConstantInt *ci = llvm::dyn_cast(elements[i]); - Assert(ci != NULL); // vs return -1 if NULL? - intMaskValue = ci->getValue(); - } - // Is the high-bit set? If so, OR in the appropriate bit in - // the result mask - if (intMaskValue.countLeadingOnes() > 0) - mask |= (1 << i); - } - return mask; -} - - -/** Given an llvm::Value represinting a vector mask, see if the value is a - constant. If so, return the integer mask found by taking the high bits - of the mask values in turn and concatenating them into a single integer. - In other words, given the 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >, - we have 0b1001 = 9. - */ -static int -lGetMask(llvm::Value *factor) { - /* FIXME: This will break if we ever do 32-wide compilation, in which case - it don't be possible to distinguish between -1 for "don't know" and - "known and all bits on". */ - Assert(g->target.vectorWidth < 32); - -#ifndef LLVM_3_0 - llvm::ConstantDataVector *cdv = llvm::dyn_cast(factor); - if (cdv != NULL) { - llvm::SmallVector elements; - for (int i = 0; i < (int)cdv->getNumElements(); ++i) - elements.push_back(cdv->getElementAsConstant(i)); - return lConstElementsToMask(elements); - } -#endif - - llvm::ConstantVector *cv = llvm::dyn_cast(factor); - if (cv != NULL) { - llvm::SmallVector elements; -#ifndef LLVM_3_0 - for (int i = 0; i < (int)cv->getNumOperands(); ++i) { - llvm::Constant *c = - llvm::dyn_cast(cv->getOperand(i)); - if (c == NULL) - return NULL; - elements.push_back(c); - } -#else - cv->getVectorElements(elements); -#endif - return lConstElementsToMask(elements); - } - else if (llvm::isa(factor)) - return 0; - else { -#if 0 - llvm::ConstantExpr *ce = llvm::dyn_cast(factor); - if (ce != NULL) { - llvm::TargetMachine *targetMachine = g->target.GetTargetMachine(); - const llvm::TargetData *td = targetMachine->getTargetData(); - llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td); - c->dump(); - factor = c; - } - // else we should be able to handle it above... - Assert(!llvm::isa(factor)); -#endif - return -1; - } -} - - /** Given an llvm::Value, return true if we can determine that it's an undefined value. This only makes a weak attempt at chasing this down, only detecting flat-out undef values, and bitcasts of undef values. @@ -779,26 +804,28 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { goto restart; } - int mask = lGetMask(factor); - llvm::Value *value = NULL; - if (mask == 0) - // Mask all off -> replace with the first blend value - value = v[0]; - else if (mask == blend->allOnMask) - // Mask all on -> replace with the second blend value - value = v[1]; + uint32_t mask; + if (lGetMask(factor, &mask) == true) { + llvm::Value *value = NULL; + if (mask == 0) + // Mask all off -> replace with the first blend value + value = v[0]; + else if (mask == blend->allOnMask) + // Mask all on -> replace with the second blend value + value = v[1]; - if (value != NULL) { - llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), - iter, value); - modifiedAny = true; - goto restart; + if (value != NULL) { + llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), + iter, value); + modifiedAny = true; + goto restart; + } } } else if (matchesMaskInstruction(callInst->getCalledFunction())) { llvm::Value *factor = callInst->getArgOperand(0); - int mask = lGetMask(factor); - if (mask != -1) { + uint32_t mask; + if (lGetMask(factor, &mask) == true) { // If the vector-valued mask has a known value, replace it // with the corresponding integer mask from its elements // high bits. @@ -812,71 +839,75 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { else if (callInst->getCalledFunction() == avxMaskedLoad32 || callInst->getCalledFunction() == avxMaskedLoad64) { llvm::Value *factor = callInst->getArgOperand(1); - int mask = lGetMask(factor); - if (mask == 0) { - // nothing being loaded, replace with undef value - llvm::Type *returnType = callInst->getType(); - Assert(llvm::isa(returnType)); - llvm::Value *undefValue = llvm::UndefValue::get(returnType); - llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), - iter, undefValue); - modifiedAny = true; - goto restart; - } - else if (mask == 0xff) { - // all lanes active; replace with a regular load - llvm::Type *returnType = callInst->getType(); - Assert(llvm::isa(returnType)); - // cast the i8 * to the appropriate type - const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast"); - llvm::Value *castPtr = - new llvm::BitCastInst(callInst->getArgOperand(0), - llvm::PointerType::get(returnType, 0), - name, callInst); - lCopyMetadata(castPtr, callInst); - int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8; - name = LLVMGetName(callInst->getArgOperand(0), "_load"); - llvm::Instruction *loadInst = - new llvm::LoadInst(castPtr, name, false /* not volatile */, - align, (llvm::Instruction *)NULL); - lCopyMetadata(loadInst, callInst); - llvm::ReplaceInstWithInst(callInst, loadInst); - modifiedAny = true; - goto restart; + uint32_t mask; + if (lGetMask(factor, &mask) == true) { + if (mask == 0) { + // nothing being loaded, replace with undef value + llvm::Type *returnType = callInst->getType(); + Assert(llvm::isa(returnType)); + llvm::Value *undefValue = llvm::UndefValue::get(returnType); + llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), + iter, undefValue); + modifiedAny = true; + goto restart; + } + else if (mask == 0xff) { + // all lanes active; replace with a regular load + llvm::Type *returnType = callInst->getType(); + Assert(llvm::isa(returnType)); + // cast the i8 * to the appropriate type + const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast"); + llvm::Value *castPtr = + new llvm::BitCastInst(callInst->getArgOperand(0), + llvm::PointerType::get(returnType, 0), + name, callInst); + lCopyMetadata(castPtr, callInst); + int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8; + name = LLVMGetName(callInst->getArgOperand(0), "_load"); + llvm::Instruction *loadInst = + new llvm::LoadInst(castPtr, name, false /* not volatile */, + align, (llvm::Instruction *)NULL); + lCopyMetadata(loadInst, callInst); + llvm::ReplaceInstWithInst(callInst, loadInst); + modifiedAny = true; + goto restart; + } } } else if (callInst->getCalledFunction() == avxMaskedStore32 || callInst->getCalledFunction() == avxMaskedStore64) { // NOTE: mask is the 2nd parameter, not the 3rd one!! llvm::Value *factor = callInst->getArgOperand(1); - int mask = lGetMask(factor); - if (mask == 0) { - // nothing actually being stored, just remove the inst - callInst->eraseFromParent(); - modifiedAny = true; - goto restart; - } - else if (mask == 0xff) { - // all lanes storing, so replace with a regular store - llvm::Value *rvalue = callInst->getArgOperand(2); - llvm::Type *storeType = rvalue->getType(); - const char *name = LLVMGetName(callInst->getArgOperand(0), - "_ptrcast"); - llvm::Value *castPtr = - new llvm::BitCastInst(callInst->getArgOperand(0), - llvm::PointerType::get(storeType, 0), - name, callInst); - lCopyMetadata(castPtr, callInst); + uint32_t mask; + if (lGetMask(factor, &mask) == true) { + if (mask == 0) { + // nothing actually being stored, just remove the inst + callInst->eraseFromParent(); + modifiedAny = true; + goto restart; + } + else if (mask == 0xff) { + // all lanes storing, so replace with a regular store + llvm::Value *rvalue = callInst->getArgOperand(2); + llvm::Type *storeType = rvalue->getType(); + const char *name = LLVMGetName(callInst->getArgOperand(0), + "_ptrcast"); + llvm::Value *castPtr = + new llvm::BitCastInst(callInst->getArgOperand(0), + llvm::PointerType::get(storeType, 0), + name, callInst); + lCopyMetadata(castPtr, callInst); - llvm::StoreInst *storeInst = - new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL); - int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8; - storeInst->setAlignment(align); - lCopyMetadata(storeInst, callInst); - llvm::ReplaceInstWithInst(callInst, storeInst); + llvm::StoreInst *storeInst = + new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL); + int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8; + storeInst->setAlignment(align); + lCopyMetadata(storeInst, callInst); + llvm::ReplaceInstWithInst(callInst, storeInst); - modifiedAny = true; - goto restart; + modifiedAny = true; + goto restart; + } } } } @@ -949,13 +980,13 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { llvm::SelectInst *selectInst = llvm::dyn_cast(&*iter); if (selectInst != NULL && selectInst->getType()->isVectorTy()) { llvm::Value *factor = selectInst->getOperand(0); - int mask = lGetMask(factor); - int allOnMask = (1 << g->target.vectorWidth) - 1; + + MaskStatus maskStatus = lGetMaskStatus(factor); llvm::Value *value = NULL; - if (mask == allOnMask) + if (maskStatus == ALL_ON) // Mask all on -> replace with the first select value value = selectInst->getOperand(1); - else if (mask == 0) + else if (maskStatus == ALL_OFF) // Mask all off -> replace with the second select value value = selectInst->getOperand(2); @@ -976,8 +1007,8 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) { if (calledFunc == NULL || calledFunc != m->module->getFunction("__movmsk")) continue; - int mask = lGetMask(callInst->getArgOperand(0)); - if (mask != -1) { + uint32_t mask; + if (lGetMask(callInst->getArgOperand(0), &mask) == true) { #if 0 fprintf(stderr, "mask %d\n", mask); callInst->getArgOperand(0)->dump(); @@ -1964,10 +1995,8 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { llvm::Value *rvalue = callInst->getArgOperand(1); llvm::Value *mask = callInst->getArgOperand(2); - int allOnMask = (1 << g->target.vectorWidth) - 1; - - int maskAsInt = lGetMask(mask); - if (maskAsInt == 0) { + MaskStatus maskStatus = lGetMaskStatus(mask); + if (maskStatus == ALL_OFF) { // Zero mask - no-op, so remove the store completely. (This // may in turn lead to being able to optimize out instructions // that compute the rvalue...) @@ -1975,11 +2004,10 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { modifiedAny = true; goto restart; } - else if (maskAsInt == allOnMask) { + else if (maskStatus == ALL_ON) { // The mask is all on, so turn this into a regular store llvm::Type *rvalueType = rvalue->getType(); - llvm::Type *ptrType = - llvm::PointerType::get(rvalueType, 0); + llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0); lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst); lCopyMetadata(lvalue, callInst); @@ -2072,20 +2100,18 @@ MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) { // Got one; grab the operands llvm::Value *ptr = callInst->getArgOperand(0); llvm::Value *mask = callInst->getArgOperand(1); - int allOnMask = (1 << g->target.vectorWidth) - 1; - int maskAsInt = lGetMask(mask); - if (maskAsInt == 0) { + MaskStatus maskStatus = lGetMaskStatus(mask); + if (maskStatus == ALL_OFF) { // Zero mask - no-op, so replace the load with an undef value llvm::ReplaceInstWithValue(iter->getParent()->getInstList(), iter, llvm::UndefValue::get(callInst->getType())); modifiedAny = true; goto restart; } - else if (maskAsInt == allOnMask) { + else if (maskStatus == ALL_ON) { // The mask is all on, so turn this into a regular load - llvm::Type *ptrType = - llvm::PointerType::get(callInst->getType(), 0); + llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0); ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load", callInst); llvm::Instruction *load = @@ -2558,18 +2584,6 @@ public: char GatherCoalescePass::ID = 0; -/* Returns true if the mask is known at compile time to be "all on". */ -static bool -lIsMaskAllOn(llvm::Value *mask) { - int m = lGetMask(mask); - if (m == -1) - return false; - - int allOnMask = (1 << g->target.vectorWidth) - 1; - return (m == allOnMask); -} - - /** Representation of a memory load that the gather coalescing code has decided to generate. */ @@ -3497,7 +3511,7 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) { // Then and only then do we have a common base pointer with all // offsets from that constants (in which case we can potentially // coalesce). - if (lIsMaskAllOn(mask) == false) + if (lGetMaskStatus(mask) != ALL_ON) continue; if (!LLVMVectorValuesAllEqual(variableOffsets)) diff --git a/run_tests.py b/run_tests.py index ce5e98f1..79465267 100755 --- a/run_tests.py +++ b/run_tests.py @@ -33,7 +33,7 @@ parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order parser.add_option("-g", "--generics-include", dest="include_file", help="Filename for header implementing functions for generics", default=None) parser.add_option('-t', '--target', dest='target', - help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16)', + help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16, generic-32)', default="sse4") parser.add_option('-a', '--arch', dest='arch', help='Set architecture (x86, x86-64)', @@ -69,6 +69,9 @@ if is_generic_target and options.include_file == None: elif options.target == "generic-16": sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-16.h\n") options.include_file = "examples/intrinsics/generic-16.h" + elif options.target == "generic-32": + sys.stderr.write("No generics #include specified and no default available for \"generic-32\" target.\n") + sys.exit(1) if options.compiler_exe == None: if is_windows: