Add generic-32 target.
This commit is contained in:
2
Makefile
2
Makefile
@@ -85,7 +85,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
|||||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||||
opt.h stmt.h sym.h type.h util.h
|
opt.h stmt.h sym.h type.h util.h
|
||||||
TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
|
TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
|
||||||
generic-16 generic-1
|
generic-16 generic-32 generic-1
|
||||||
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
|
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
|
||||||
builtins/dispatch.ll
|
builtins/dispatch.ll
|
||||||
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
|
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
|
||||||
|
|||||||
@@ -847,6 +847,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
builtins_bitcode_generic_16_length,
|
builtins_bitcode_generic_16_length,
|
||||||
module, symbolTable);
|
module, symbolTable);
|
||||||
break;
|
break;
|
||||||
|
case 32:
|
||||||
|
extern unsigned char builtins_bitcode_generic_32[];
|
||||||
|
extern int builtins_bitcode_generic_32_length;
|
||||||
|
AddBitcodeToModule(builtins_bitcode_generic_32,
|
||||||
|
builtins_bitcode_generic_32_length,
|
||||||
|
module, symbolTable);
|
||||||
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
extern unsigned char builtins_bitcode_generic_1[];
|
extern unsigned char builtins_bitcode_generic_1[];
|
||||||
extern int builtins_bitcode_generic_1_length;
|
extern int builtins_bitcode_generic_1_length;
|
||||||
|
|||||||
33
builtins/target-generic-32.ll
Normal file
33
builtins/target-generic-32.ll
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
;; Copyright (c) 2010-2012, Intel Corporation
|
||||||
|
;; All rights reserved.
|
||||||
|
;;
|
||||||
|
;; Redistribution and use in source and binary forms, with or without
|
||||||
|
;; modification, are permitted provided that the following conditions are
|
||||||
|
;; met:
|
||||||
|
;;
|
||||||
|
;; * Redistributions of source code must retain the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer.
|
||||||
|
;;
|
||||||
|
;; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer in the
|
||||||
|
;; documentation and/or other materials provided with the distribution.
|
||||||
|
;;
|
||||||
|
;; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
;; contributors may be used to endorse or promote products derived from
|
||||||
|
;; this software without specific prior written permission.
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
define(`WIDTH',`32')
|
||||||
|
include(`target-generic-common.ll')
|
||||||
11
ispc.cpp
11
ispc.cpp
@@ -257,6 +257,14 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
t->allOffMaskIsSafe = true;
|
t->allOffMaskIsSafe = true;
|
||||||
t->maskBitCount = 1;
|
t->maskBitCount = 1;
|
||||||
}
|
}
|
||||||
|
else if (!strcasecmp(isa, "generic-32")) {
|
||||||
|
t->isa = Target::GENERIC;
|
||||||
|
t->nativeVectorWidth = 32;
|
||||||
|
t->vectorWidth = 32;
|
||||||
|
t->maskingIsFree = true;
|
||||||
|
t->allOffMaskIsSafe = true;
|
||||||
|
t->maskBitCount = 1;
|
||||||
|
}
|
||||||
else if (!strcasecmp(isa, "generic-1")) {
|
else if (!strcasecmp(isa, "generic-1")) {
|
||||||
t->isa = Target::GENERIC;
|
t->isa = Target::GENERIC;
|
||||||
t->nativeVectorWidth = 1;
|
t->nativeVectorWidth = 1;
|
||||||
@@ -313,6 +321,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
llvm::TargetMachine *targetMachine = t->GetTargetMachine();
|
llvm::TargetMachine *targetMachine = t->GetTargetMachine();
|
||||||
const llvm::TargetData *targetData = targetMachine->getTargetData();
|
const llvm::TargetData *targetData = targetMachine->getTargetData();
|
||||||
t->is32Bit = (targetData->getPointerSize() == 4);
|
t->is32Bit = (targetData->getPointerSize() == 4);
|
||||||
|
Assert(t->vectorWidth <= ISPC_MAX_NVEC);
|
||||||
}
|
}
|
||||||
|
|
||||||
return !error;
|
return !error;
|
||||||
@@ -344,7 +353,7 @@ Target::SupportedTargetISAs() {
|
|||||||
#ifndef LLVM_3_0
|
#ifndef LLVM_3_0
|
||||||
", avx2, avx2-x2"
|
", avx2, avx2-x2"
|
||||||
#endif // !LLVM_3_0
|
#endif // !LLVM_3_0
|
||||||
", generic-4, generic-8, generic-16, generic-1";
|
", generic-1, generic-4, generic-8, generic-16, generic-32";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
2
ispc.h
2
ispc.h
@@ -71,7 +71,7 @@
|
|||||||
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
|
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
|
||||||
targets.
|
targets.
|
||||||
*/
|
*/
|
||||||
#define ISPC_MAX_NVEC 16
|
#define ISPC_MAX_NVEC 32
|
||||||
|
|
||||||
// Forward declarations of a number of widely-used LLVM types
|
// Forward declarations of a number of widely-used LLVM types
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
|||||||
14
ispc.vcxproj
14
ispc.vcxproj
@@ -29,6 +29,7 @@
|
|||||||
<ClCompile Include="gen-bitcode-generic-4.cpp" />
|
<ClCompile Include="gen-bitcode-generic-4.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-generic-8.cpp" />
|
<ClCompile Include="gen-bitcode-generic-8.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-generic-16.cpp" />
|
<ClCompile Include="gen-bitcode-generic-16.cpp" />
|
||||||
|
<ClCompile Include="gen-bitcode-generic-32.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-sse2.cpp" />
|
<ClCompile Include="gen-bitcode-sse2.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-sse2-x2.cpp" />
|
<ClCompile Include="gen-bitcode-sse2-x2.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-sse4.cpp" />
|
<ClCompile Include="gen-bitcode-sse4.cpp" />
|
||||||
@@ -264,6 +265,19 @@
|
|||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-16.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-16.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-generic-32.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-32.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-32.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-32.cpp</Message>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-32.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="lex.ll">
|
<CustomBuild Include="lex.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
|
|||||||
412
opt.cpp
412
opt.cpp
@@ -265,6 +265,124 @@ lGEPInst(llvm::Value *ptr, llvm::Value *offset, const char *name,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Given a vector of constant values (int, float, or bool) representing an
|
||||||
|
execution mask, convert it to a bitvector where the 0th bit corresponds
|
||||||
|
to the first vector value and so forth.
|
||||||
|
*/
|
||||||
|
static uint32_t
|
||||||
|
lConstElementsToMask(const llvm::SmallVector<llvm::Constant *,
|
||||||
|
ISPC_MAX_NVEC> &elements) {
|
||||||
|
Assert(elements.size() <= 32);
|
||||||
|
|
||||||
|
uint32_t mask = 0;
|
||||||
|
for (unsigned int i = 0; i < elements.size(); ++i) {
|
||||||
|
llvm::APInt intMaskValue;
|
||||||
|
// SSE has the "interesting" approach of encoding blending
|
||||||
|
// masks as <n x float>.
|
||||||
|
llvm::ConstantFP *cf = llvm::dyn_cast<llvm::ConstantFP>(elements[i]);
|
||||||
|
if (cf != NULL) {
|
||||||
|
llvm::APFloat apf = cf->getValueAPF();
|
||||||
|
intMaskValue = apf.bitcastToAPInt();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Otherwise get it as an int
|
||||||
|
llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(elements[i]);
|
||||||
|
Assert(ci != NULL); // vs return -1 if NULL?
|
||||||
|
intMaskValue = ci->getValue();
|
||||||
|
}
|
||||||
|
// Is the high-bit set? If so, OR in the appropriate bit in
|
||||||
|
// the result mask
|
||||||
|
if (intMaskValue.countLeadingOnes() > 0)
|
||||||
|
mask |= (1 << i);
|
||||||
|
}
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Given an llvm::Value represinting a vector mask, see if the value is a
|
||||||
|
constant. If so, return true and set *bits to be the integer mask
|
||||||
|
found by taking the high bits of the mask values in turn and
|
||||||
|
concatenating them into a single integer. In other words, given the
|
||||||
|
4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >, we have 0b1001 = 9.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
lGetMask(llvm::Value *factor, uint32_t *mask) {
|
||||||
|
#ifndef LLVM_3_0
|
||||||
|
llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(factor);
|
||||||
|
if (cdv != NULL) {
|
||||||
|
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
|
||||||
|
for (int i = 0; i < (int)cdv->getNumElements(); ++i)
|
||||||
|
elements.push_back(cdv->getElementAsConstant(i));
|
||||||
|
*mask = lConstElementsToMask(elements);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(factor);
|
||||||
|
if (cv != NULL) {
|
||||||
|
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
|
||||||
|
#ifndef LLVM_3_0
|
||||||
|
for (int i = 0; i < (int)cv->getNumOperands(); ++i) {
|
||||||
|
llvm::Constant *c =
|
||||||
|
llvm::dyn_cast<llvm::Constant>(cv->getOperand(i));
|
||||||
|
if (c == NULL)
|
||||||
|
return NULL;
|
||||||
|
elements.push_back(c);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
cv->getVectorElements(elements);
|
||||||
|
#endif
|
||||||
|
*mask = lConstElementsToMask(elements);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (llvm::isa<llvm::ConstantAggregateZero>(factor)) {
|
||||||
|
*mask = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
#if 0
|
||||||
|
llvm::ConstantExpr *ce = llvm::dyn_cast<llvm::ConstantExpr>(factor);
|
||||||
|
if (ce != NULL) {
|
||||||
|
llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
|
||||||
|
const llvm::TargetData *td = targetMachine->getTargetData();
|
||||||
|
llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td);
|
||||||
|
c->dump();
|
||||||
|
factor = c;
|
||||||
|
}
|
||||||
|
// else we should be able to handle it above...
|
||||||
|
Assert(!llvm::isa<llvm::Constant>(factor));
|
||||||
|
#endif
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
enum MaskStatus { ALL_ON, ALL_OFF, MIXED, UNKNOWN };
|
||||||
|
|
||||||
|
/** Determines if the given mask value is all on, all off, mixed, or
|
||||||
|
unknown at compile time.
|
||||||
|
*/
|
||||||
|
static MaskStatus
|
||||||
|
lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) {
|
||||||
|
uint32_t bits;
|
||||||
|
if (lGetMask(mask, &bits) == false)
|
||||||
|
return UNKNOWN;
|
||||||
|
|
||||||
|
if (bits == 0)
|
||||||
|
return ALL_OFF;
|
||||||
|
|
||||||
|
if (vecWidth == -1)
|
||||||
|
vecWidth = g->target.vectorWidth;
|
||||||
|
Assert(vecWidth <= 32);
|
||||||
|
|
||||||
|
for (int i = 0; i < vecWidth; ++i) {
|
||||||
|
if ((bits & (1ull << i)) == 0)
|
||||||
|
return MIXED;
|
||||||
|
}
|
||||||
|
return ALL_ON;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -559,12 +677,12 @@ private:
|
|||||||
instruction for this optimization pass.
|
instruction for this optimization pass.
|
||||||
*/
|
*/
|
||||||
struct BlendInstruction {
|
struct BlendInstruction {
|
||||||
BlendInstruction(llvm::Function *f, int ao, int o0, int o1, int of)
|
BlendInstruction(llvm::Function *f, uint32_t ao, int o0, int o1, int of)
|
||||||
: function(f), allOnMask(ao), op0(o0), op1(o1), opFactor(of) { }
|
: function(f), allOnMask(ao), op0(o0), op1(o1), opFactor(of) { }
|
||||||
/** Function pointer for the blend instruction */
|
/** Function pointer for the blend instruction */
|
||||||
llvm::Function *function;
|
llvm::Function *function;
|
||||||
/** Mask value for an "all on" mask for this instruction */
|
/** Mask value for an "all on" mask for this instruction */
|
||||||
int allOnMask;
|
uint32_t allOnMask;
|
||||||
/** The operand number in the llvm CallInst corresponds to the
|
/** The operand number in the llvm CallInst corresponds to the
|
||||||
first operand to blend with. */
|
first operand to blend with. */
|
||||||
int op0;
|
int op0;
|
||||||
@@ -609,99 +727,6 @@ IntrinsicsOpt::IntrinsicsOpt()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Given a vector of constant values (int, float, or bool) representing an
|
|
||||||
execution mask, convert it to a bitvector where the 0th bit corresponds
|
|
||||||
to the first vector value and so forth.
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
lConstElementsToMask(const llvm::SmallVector<llvm::Constant *,
|
|
||||||
ISPC_MAX_NVEC> &elements) {
|
|
||||||
Assert(elements.size() <= 32);
|
|
||||||
|
|
||||||
int mask = 0;
|
|
||||||
for (unsigned int i = 0; i < elements.size(); ++i) {
|
|
||||||
llvm::APInt intMaskValue;
|
|
||||||
// SSE has the "interesting" approach of encoding blending
|
|
||||||
// masks as <n x float>.
|
|
||||||
llvm::ConstantFP *cf = llvm::dyn_cast<llvm::ConstantFP>(elements[i]);
|
|
||||||
if (cf != NULL) {
|
|
||||||
llvm::APFloat apf = cf->getValueAPF();
|
|
||||||
intMaskValue = apf.bitcastToAPInt();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Otherwise get it as an int
|
|
||||||
llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(elements[i]);
|
|
||||||
Assert(ci != NULL); // vs return -1 if NULL?
|
|
||||||
intMaskValue = ci->getValue();
|
|
||||||
}
|
|
||||||
// Is the high-bit set? If so, OR in the appropriate bit in
|
|
||||||
// the result mask
|
|
||||||
if (intMaskValue.countLeadingOnes() > 0)
|
|
||||||
mask |= (1 << i);
|
|
||||||
}
|
|
||||||
return mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** Given an llvm::Value represinting a vector mask, see if the value is a
|
|
||||||
constant. If so, return the integer mask found by taking the high bits
|
|
||||||
of the mask values in turn and concatenating them into a single integer.
|
|
||||||
In other words, given the 4-wide mask: < 0xffffffff, 0, 0, 0xffffffff >,
|
|
||||||
we have 0b1001 = 9.
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
lGetMask(llvm::Value *factor) {
|
|
||||||
/* FIXME: This will break if we ever do 32-wide compilation, in which case
|
|
||||||
it don't be possible to distinguish between -1 for "don't know" and
|
|
||||||
"known and all bits on". */
|
|
||||||
Assert(g->target.vectorWidth < 32);
|
|
||||||
|
|
||||||
#ifndef LLVM_3_0
|
|
||||||
llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(factor);
|
|
||||||
if (cdv != NULL) {
|
|
||||||
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
|
|
||||||
for (int i = 0; i < (int)cdv->getNumElements(); ++i)
|
|
||||||
elements.push_back(cdv->getElementAsConstant(i));
|
|
||||||
return lConstElementsToMask(elements);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(factor);
|
|
||||||
if (cv != NULL) {
|
|
||||||
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
|
|
||||||
#ifndef LLVM_3_0
|
|
||||||
for (int i = 0; i < (int)cv->getNumOperands(); ++i) {
|
|
||||||
llvm::Constant *c =
|
|
||||||
llvm::dyn_cast<llvm::Constant>(cv->getOperand(i));
|
|
||||||
if (c == NULL)
|
|
||||||
return NULL;
|
|
||||||
elements.push_back(c);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
cv->getVectorElements(elements);
|
|
||||||
#endif
|
|
||||||
return lConstElementsToMask(elements);
|
|
||||||
}
|
|
||||||
else if (llvm::isa<llvm::ConstantAggregateZero>(factor))
|
|
||||||
return 0;
|
|
||||||
else {
|
|
||||||
#if 0
|
|
||||||
llvm::ConstantExpr *ce = llvm::dyn_cast<llvm::ConstantExpr>(factor);
|
|
||||||
if (ce != NULL) {
|
|
||||||
llvm::TargetMachine *targetMachine = g->target.GetTargetMachine();
|
|
||||||
const llvm::TargetData *td = targetMachine->getTargetData();
|
|
||||||
llvm::Constant *c = llvm::ConstantFoldConstantExpression(ce, td);
|
|
||||||
c->dump();
|
|
||||||
factor = c;
|
|
||||||
}
|
|
||||||
// else we should be able to handle it above...
|
|
||||||
Assert(!llvm::isa<llvm::Constant>(factor));
|
|
||||||
#endif
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** Given an llvm::Value, return true if we can determine that it's an
|
/** Given an llvm::Value, return true if we can determine that it's an
|
||||||
undefined value. This only makes a weak attempt at chasing this down,
|
undefined value. This only makes a weak attempt at chasing this down,
|
||||||
only detecting flat-out undef values, and bitcasts of undef values.
|
only detecting flat-out undef values, and bitcasts of undef values.
|
||||||
@@ -779,26 +804,28 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mask = lGetMask(factor);
|
uint32_t mask;
|
||||||
llvm::Value *value = NULL;
|
if (lGetMask(factor, &mask) == true) {
|
||||||
if (mask == 0)
|
llvm::Value *value = NULL;
|
||||||
// Mask all off -> replace with the first blend value
|
if (mask == 0)
|
||||||
value = v[0];
|
// Mask all off -> replace with the first blend value
|
||||||
else if (mask == blend->allOnMask)
|
value = v[0];
|
||||||
// Mask all on -> replace with the second blend value
|
else if (mask == blend->allOnMask)
|
||||||
value = v[1];
|
// Mask all on -> replace with the second blend value
|
||||||
|
value = v[1];
|
||||||
|
|
||||||
if (value != NULL) {
|
if (value != NULL) {
|
||||||
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||||
iter, value);
|
iter, value);
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (matchesMaskInstruction(callInst->getCalledFunction())) {
|
else if (matchesMaskInstruction(callInst->getCalledFunction())) {
|
||||||
llvm::Value *factor = callInst->getArgOperand(0);
|
llvm::Value *factor = callInst->getArgOperand(0);
|
||||||
int mask = lGetMask(factor);
|
uint32_t mask;
|
||||||
if (mask != -1) {
|
if (lGetMask(factor, &mask) == true) {
|
||||||
// If the vector-valued mask has a known value, replace it
|
// If the vector-valued mask has a known value, replace it
|
||||||
// with the corresponding integer mask from its elements
|
// with the corresponding integer mask from its elements
|
||||||
// high bits.
|
// high bits.
|
||||||
@@ -812,71 +839,75 @@ IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
else if (callInst->getCalledFunction() == avxMaskedLoad32 ||
|
else if (callInst->getCalledFunction() == avxMaskedLoad32 ||
|
||||||
callInst->getCalledFunction() == avxMaskedLoad64) {
|
callInst->getCalledFunction() == avxMaskedLoad64) {
|
||||||
llvm::Value *factor = callInst->getArgOperand(1);
|
llvm::Value *factor = callInst->getArgOperand(1);
|
||||||
int mask = lGetMask(factor);
|
uint32_t mask;
|
||||||
if (mask == 0) {
|
if (lGetMask(factor, &mask) == true) {
|
||||||
// nothing being loaded, replace with undef value
|
if (mask == 0) {
|
||||||
llvm::Type *returnType = callInst->getType();
|
// nothing being loaded, replace with undef value
|
||||||
Assert(llvm::isa<llvm::VectorType>(returnType));
|
llvm::Type *returnType = callInst->getType();
|
||||||
llvm::Value *undefValue = llvm::UndefValue::get(returnType);
|
Assert(llvm::isa<llvm::VectorType>(returnType));
|
||||||
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
llvm::Value *undefValue = llvm::UndefValue::get(returnType);
|
||||||
iter, undefValue);
|
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||||
modifiedAny = true;
|
iter, undefValue);
|
||||||
goto restart;
|
modifiedAny = true;
|
||||||
}
|
goto restart;
|
||||||
else if (mask == 0xff) {
|
}
|
||||||
// all lanes active; replace with a regular load
|
else if (mask == 0xff) {
|
||||||
llvm::Type *returnType = callInst->getType();
|
// all lanes active; replace with a regular load
|
||||||
Assert(llvm::isa<llvm::VectorType>(returnType));
|
llvm::Type *returnType = callInst->getType();
|
||||||
// cast the i8 * to the appropriate type
|
Assert(llvm::isa<llvm::VectorType>(returnType));
|
||||||
const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast");
|
// cast the i8 * to the appropriate type
|
||||||
llvm::Value *castPtr =
|
const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast");
|
||||||
new llvm::BitCastInst(callInst->getArgOperand(0),
|
llvm::Value *castPtr =
|
||||||
llvm::PointerType::get(returnType, 0),
|
new llvm::BitCastInst(callInst->getArgOperand(0),
|
||||||
name, callInst);
|
llvm::PointerType::get(returnType, 0),
|
||||||
lCopyMetadata(castPtr, callInst);
|
name, callInst);
|
||||||
int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
|
lCopyMetadata(castPtr, callInst);
|
||||||
name = LLVMGetName(callInst->getArgOperand(0), "_load");
|
int align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8;
|
||||||
llvm::Instruction *loadInst =
|
name = LLVMGetName(callInst->getArgOperand(0), "_load");
|
||||||
new llvm::LoadInst(castPtr, name, false /* not volatile */,
|
llvm::Instruction *loadInst =
|
||||||
align, (llvm::Instruction *)NULL);
|
new llvm::LoadInst(castPtr, name, false /* not volatile */,
|
||||||
lCopyMetadata(loadInst, callInst);
|
align, (llvm::Instruction *)NULL);
|
||||||
llvm::ReplaceInstWithInst(callInst, loadInst);
|
lCopyMetadata(loadInst, callInst);
|
||||||
modifiedAny = true;
|
llvm::ReplaceInstWithInst(callInst, loadInst);
|
||||||
goto restart;
|
modifiedAny = true;
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (callInst->getCalledFunction() == avxMaskedStore32 ||
|
else if (callInst->getCalledFunction() == avxMaskedStore32 ||
|
||||||
callInst->getCalledFunction() == avxMaskedStore64) {
|
callInst->getCalledFunction() == avxMaskedStore64) {
|
||||||
// NOTE: mask is the 2nd parameter, not the 3rd one!!
|
// NOTE: mask is the 2nd parameter, not the 3rd one!!
|
||||||
llvm::Value *factor = callInst->getArgOperand(1);
|
llvm::Value *factor = callInst->getArgOperand(1);
|
||||||
int mask = lGetMask(factor);
|
uint32_t mask;
|
||||||
if (mask == 0) {
|
if (lGetMask(factor, &mask) == true) {
|
||||||
// nothing actually being stored, just remove the inst
|
if (mask == 0) {
|
||||||
callInst->eraseFromParent();
|
// nothing actually being stored, just remove the inst
|
||||||
modifiedAny = true;
|
callInst->eraseFromParent();
|
||||||
goto restart;
|
modifiedAny = true;
|
||||||
}
|
goto restart;
|
||||||
else if (mask == 0xff) {
|
}
|
||||||
// all lanes storing, so replace with a regular store
|
else if (mask == 0xff) {
|
||||||
llvm::Value *rvalue = callInst->getArgOperand(2);
|
// all lanes storing, so replace with a regular store
|
||||||
llvm::Type *storeType = rvalue->getType();
|
llvm::Value *rvalue = callInst->getArgOperand(2);
|
||||||
const char *name = LLVMGetName(callInst->getArgOperand(0),
|
llvm::Type *storeType = rvalue->getType();
|
||||||
"_ptrcast");
|
const char *name = LLVMGetName(callInst->getArgOperand(0),
|
||||||
llvm::Value *castPtr =
|
"_ptrcast");
|
||||||
new llvm::BitCastInst(callInst->getArgOperand(0),
|
llvm::Value *castPtr =
|
||||||
llvm::PointerType::get(storeType, 0),
|
new llvm::BitCastInst(callInst->getArgOperand(0),
|
||||||
name, callInst);
|
llvm::PointerType::get(storeType, 0),
|
||||||
lCopyMetadata(castPtr, callInst);
|
name, callInst);
|
||||||
|
lCopyMetadata(castPtr, callInst);
|
||||||
|
|
||||||
llvm::StoreInst *storeInst =
|
llvm::StoreInst *storeInst =
|
||||||
new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
|
new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL);
|
||||||
int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
|
int align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8;
|
||||||
storeInst->setAlignment(align);
|
storeInst->setAlignment(align);
|
||||||
lCopyMetadata(storeInst, callInst);
|
lCopyMetadata(storeInst, callInst);
|
||||||
llvm::ReplaceInstWithInst(callInst, storeInst);
|
llvm::ReplaceInstWithInst(callInst, storeInst);
|
||||||
|
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -949,13 +980,13 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
llvm::SelectInst *selectInst = llvm::dyn_cast<llvm::SelectInst>(&*iter);
|
llvm::SelectInst *selectInst = llvm::dyn_cast<llvm::SelectInst>(&*iter);
|
||||||
if (selectInst != NULL && selectInst->getType()->isVectorTy()) {
|
if (selectInst != NULL && selectInst->getType()->isVectorTy()) {
|
||||||
llvm::Value *factor = selectInst->getOperand(0);
|
llvm::Value *factor = selectInst->getOperand(0);
|
||||||
int mask = lGetMask(factor);
|
|
||||||
int allOnMask = (1 << g->target.vectorWidth) - 1;
|
MaskStatus maskStatus = lGetMaskStatus(factor);
|
||||||
llvm::Value *value = NULL;
|
llvm::Value *value = NULL;
|
||||||
if (mask == allOnMask)
|
if (maskStatus == ALL_ON)
|
||||||
// Mask all on -> replace with the first select value
|
// Mask all on -> replace with the first select value
|
||||||
value = selectInst->getOperand(1);
|
value = selectInst->getOperand(1);
|
||||||
else if (mask == 0)
|
else if (maskStatus == ALL_OFF)
|
||||||
// Mask all off -> replace with the second select value
|
// Mask all off -> replace with the second select value
|
||||||
value = selectInst->getOperand(2);
|
value = selectInst->getOperand(2);
|
||||||
|
|
||||||
@@ -976,8 +1007,8 @@ VSelMovmskOpt::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
if (calledFunc == NULL || calledFunc != m->module->getFunction("__movmsk"))
|
if (calledFunc == NULL || calledFunc != m->module->getFunction("__movmsk"))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
int mask = lGetMask(callInst->getArgOperand(0));
|
uint32_t mask;
|
||||||
if (mask != -1) {
|
if (lGetMask(callInst->getArgOperand(0), &mask) == true) {
|
||||||
#if 0
|
#if 0
|
||||||
fprintf(stderr, "mask %d\n", mask);
|
fprintf(stderr, "mask %d\n", mask);
|
||||||
callInst->getArgOperand(0)->dump();
|
callInst->getArgOperand(0)->dump();
|
||||||
@@ -1964,10 +1995,8 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
llvm::Value *rvalue = callInst->getArgOperand(1);
|
llvm::Value *rvalue = callInst->getArgOperand(1);
|
||||||
llvm::Value *mask = callInst->getArgOperand(2);
|
llvm::Value *mask = callInst->getArgOperand(2);
|
||||||
|
|
||||||
int allOnMask = (1 << g->target.vectorWidth) - 1;
|
MaskStatus maskStatus = lGetMaskStatus(mask);
|
||||||
|
if (maskStatus == ALL_OFF) {
|
||||||
int maskAsInt = lGetMask(mask);
|
|
||||||
if (maskAsInt == 0) {
|
|
||||||
// Zero mask - no-op, so remove the store completely. (This
|
// Zero mask - no-op, so remove the store completely. (This
|
||||||
// may in turn lead to being able to optimize out instructions
|
// may in turn lead to being able to optimize out instructions
|
||||||
// that compute the rvalue...)
|
// that compute the rvalue...)
|
||||||
@@ -1975,11 +2004,10 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
else if (maskAsInt == allOnMask) {
|
else if (maskStatus == ALL_ON) {
|
||||||
// The mask is all on, so turn this into a regular store
|
// The mask is all on, so turn this into a regular store
|
||||||
llvm::Type *rvalueType = rvalue->getType();
|
llvm::Type *rvalueType = rvalue->getType();
|
||||||
llvm::Type *ptrType =
|
llvm::Type *ptrType = llvm::PointerType::get(rvalueType, 0);
|
||||||
llvm::PointerType::get(rvalueType, 0);
|
|
||||||
|
|
||||||
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
|
lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst);
|
||||||
lCopyMetadata(lvalue, callInst);
|
lCopyMetadata(lvalue, callInst);
|
||||||
@@ -2072,20 +2100,18 @@ MaskedLoadOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
// Got one; grab the operands
|
// Got one; grab the operands
|
||||||
llvm::Value *ptr = callInst->getArgOperand(0);
|
llvm::Value *ptr = callInst->getArgOperand(0);
|
||||||
llvm::Value *mask = callInst->getArgOperand(1);
|
llvm::Value *mask = callInst->getArgOperand(1);
|
||||||
int allOnMask = (1 << g->target.vectorWidth) - 1;
|
|
||||||
|
|
||||||
int maskAsInt = lGetMask(mask);
|
MaskStatus maskStatus = lGetMaskStatus(mask);
|
||||||
if (maskAsInt == 0) {
|
if (maskStatus == ALL_OFF) {
|
||||||
// Zero mask - no-op, so replace the load with an undef value
|
// Zero mask - no-op, so replace the load with an undef value
|
||||||
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
llvm::ReplaceInstWithValue(iter->getParent()->getInstList(),
|
||||||
iter, llvm::UndefValue::get(callInst->getType()));
|
iter, llvm::UndefValue::get(callInst->getType()));
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
else if (maskAsInt == allOnMask) {
|
else if (maskStatus == ALL_ON) {
|
||||||
// The mask is all on, so turn this into a regular load
|
// The mask is all on, so turn this into a regular load
|
||||||
llvm::Type *ptrType =
|
llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0);
|
||||||
llvm::PointerType::get(callInst->getType(), 0);
|
|
||||||
ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load",
|
ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load",
|
||||||
callInst);
|
callInst);
|
||||||
llvm::Instruction *load =
|
llvm::Instruction *load =
|
||||||
@@ -2558,18 +2584,6 @@ public:
|
|||||||
char GatherCoalescePass::ID = 0;
|
char GatherCoalescePass::ID = 0;
|
||||||
|
|
||||||
|
|
||||||
/* Returns true if the mask is known at compile time to be "all on". */
|
|
||||||
static bool
|
|
||||||
lIsMaskAllOn(llvm::Value *mask) {
|
|
||||||
int m = lGetMask(mask);
|
|
||||||
if (m == -1)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
int allOnMask = (1 << g->target.vectorWidth) - 1;
|
|
||||||
return (m == allOnMask);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** Representation of a memory load that the gather coalescing code has
|
/** Representation of a memory load that the gather coalescing code has
|
||||||
decided to generate.
|
decided to generate.
|
||||||
*/
|
*/
|
||||||
@@ -3497,7 +3511,7 @@ GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
// Then and only then do we have a common base pointer with all
|
// Then and only then do we have a common base pointer with all
|
||||||
// offsets from that constants (in which case we can potentially
|
// offsets from that constants (in which case we can potentially
|
||||||
// coalesce).
|
// coalesce).
|
||||||
if (lIsMaskAllOn(mask) == false)
|
if (lGetMaskStatus(mask) != ALL_ON)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!LLVMVectorValuesAllEqual(variableOffsets))
|
if (!LLVMVectorValuesAllEqual(variableOffsets))
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order
|
|||||||
parser.add_option("-g", "--generics-include", dest="include_file", help="Filename for header implementing functions for generics",
|
parser.add_option("-g", "--generics-include", dest="include_file", help="Filename for header implementing functions for generics",
|
||||||
default=None)
|
default=None)
|
||||||
parser.add_option('-t', '--target', dest='target',
|
parser.add_option('-t', '--target', dest='target',
|
||||||
help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16)',
|
help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16, generic-32)',
|
||||||
default="sse4")
|
default="sse4")
|
||||||
parser.add_option('-a', '--arch', dest='arch',
|
parser.add_option('-a', '--arch', dest='arch',
|
||||||
help='Set architecture (x86, x86-64)',
|
help='Set architecture (x86, x86-64)',
|
||||||
@@ -69,6 +69,9 @@ if is_generic_target and options.include_file == None:
|
|||||||
elif options.target == "generic-16":
|
elif options.target == "generic-16":
|
||||||
sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-16.h\n")
|
sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-16.h\n")
|
||||||
options.include_file = "examples/intrinsics/generic-16.h"
|
options.include_file = "examples/intrinsics/generic-16.h"
|
||||||
|
elif options.target == "generic-32":
|
||||||
|
sys.stderr.write("No generics #include specified and no default available for \"generic-32\" target.\n")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
if options.compiler_exe == None:
|
if options.compiler_exe == None:
|
||||||
if is_windows:
|
if is_windows:
|
||||||
|
|||||||
Reference in New Issue
Block a user