Handle more instruction types when flattening offset vectors.
Generalize the lScalarizeVector() utility routine (used in determining when we can change gathers/scatters into vector loads/stores, respectively) to handle vector shuffles and vector loads. This fixes issue #79, which provided a case where a gather was being performed even though a vector load was possible.
This commit is contained in:
208
opt.cpp
208
opt.cpp
@@ -78,6 +78,12 @@
|
|||||||
#endif
|
#endif
|
||||||
#include <llvm/Analysis/DebugInfo.h>
|
#include <llvm/Analysis/DebugInfo.h>
|
||||||
#include <llvm/Support/Dwarf.h>
|
#include <llvm/Support/Dwarf.h>
|
||||||
|
#ifdef ISPC_IS_LINUX
|
||||||
|
#include <alloca.h>
|
||||||
|
#elif defined(ISPC_IS_WINDOWS)
|
||||||
|
#include <malloc.h>
|
||||||
|
#define alloca _alloca
|
||||||
|
#endif // ISPC_IS_WINDOWS
|
||||||
|
|
||||||
static llvm::Pass *CreateIntrinsicsOptPass();
|
static llvm::Pass *CreateIntrinsicsOptPass();
|
||||||
static llvm::Pass *CreateGatherScatterFlattenPass();
|
static llvm::Pass *CreateGatherScatterFlattenPass();
|
||||||
@@ -1520,8 +1526,8 @@ static void lPrintVector(const char *info, llvm::Value *elements[ISPC_MAX_NVEC])
|
|||||||
|
|
||||||
|
|
||||||
/** Given an LLVM vector in vec, return a 'scalarized' version of the
|
/** Given an LLVM vector in vec, return a 'scalarized' version of the
|
||||||
vector in the provided offsets[] array. For example, if the vector
|
vector in the provided scalarizedVector[] array. For example, if the
|
||||||
value passed in is:
|
vector value passed in is:
|
||||||
|
|
||||||
add <4 x i32> %a_smear, <4 x i32> <4, 8, 12, 16>,
|
add <4 x i32> %a_smear, <4 x i32> <4, 8, 12, 16>,
|
||||||
|
|
||||||
@@ -1542,28 +1548,39 @@ static void lPrintVector(const char *info, llvm::Value *elements[ISPC_MAX_NVEC])
|
|||||||
@param vec Vector to be scalarized
|
@param vec Vector to be scalarized
|
||||||
@param scalarizedVector Array in which to store the individual vector
|
@param scalarizedVector Array in which to store the individual vector
|
||||||
elements
|
elements
|
||||||
|
@param vectorLength Number of elements in the given vector. (The
|
||||||
|
passed scalarizedVector array must also be at least
|
||||||
|
this length as well.)
|
||||||
@returns True if the vector was successfully scalarized and
|
@returns True if the vector was successfully scalarized and
|
||||||
the values in offsets[] are valid; false otherwise
|
the values in offsets[] are valid; false otherwise
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC]) {
|
lScalarizeVector(llvm::Value *vec, llvm::Value **scalarizedVector,
|
||||||
|
int vectorLength) {
|
||||||
// First initialize the values of scalarizedVector[] to NULL.
|
// First initialize the values of scalarizedVector[] to NULL.
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < vectorLength; ++i)
|
||||||
scalarizedVector[i] = NULL;
|
scalarizedVector[i] = NULL;
|
||||||
|
|
||||||
|
// It may be ok for the vector to be an undef vector; these come up for
|
||||||
|
// example in shufflevector instructions. As long as elements of the
|
||||||
|
// undef vector aren't referenced by the shuffle indices, this is fine.
|
||||||
|
if (llvm::isa<llvm::UndefValue>(vec))
|
||||||
|
return true;
|
||||||
|
|
||||||
// ConstantVectors are easy; just pull out the individual constant
|
// ConstantVectors are easy; just pull out the individual constant
|
||||||
// element values
|
// element values
|
||||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(vec);
|
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(vec);
|
||||||
if (cv != NULL) {
|
if (cv != NULL) {
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
for (int i = 0; i < vectorLength; ++i)
|
||||||
scalarizedVector[i] = cv->getOperand(i);
|
scalarizedVector[i] = cv->getOperand(i);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// It's also easy if it's just a vector of all zeros
|
// It's also easy if it's just a vector of all zeros
|
||||||
llvm::ConstantAggregateZero *caz = llvm::dyn_cast<llvm::ConstantAggregateZero>(vec);
|
llvm::ConstantAggregateZero *caz =
|
||||||
if (caz) {
|
llvm::dyn_cast<llvm::ConstantAggregateZero>(vec);
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
if (caz != NULL) {
|
||||||
|
for (int i = 0; i < vectorLength; ++i)
|
||||||
scalarizedVector[i] = LLVMInt32(0);
|
scalarizedVector[i] = LLVMInt32(0);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -1575,13 +1592,16 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
// scalar values we return from here are synthesized with scalar
|
// scalar values we return from here are synthesized with scalar
|
||||||
// versions of the original vector binary operator
|
// versions of the original vector binary operator
|
||||||
llvm::Instruction::BinaryOps opcode = bo->getOpcode();
|
llvm::Instruction::BinaryOps opcode = bo->getOpcode();
|
||||||
llvm::Value *v0[ISPC_MAX_NVEC], *v1[ISPC_MAX_NVEC];
|
llvm::Value **v0 =
|
||||||
|
(llvm::Value **)alloca(vectorLength * sizeof(llvm::Value *));
|
||||||
|
llvm::Value **v1 =
|
||||||
|
(llvm::Value **)alloca(vectorLength * sizeof(llvm::Value *));
|
||||||
|
|
||||||
if (!lScalarizeVector(bo->getOperand(0), v0) ||
|
if (!lScalarizeVector(bo->getOperand(0), v0, vectorLength) ||
|
||||||
!lScalarizeVector(bo->getOperand(1), v1))
|
!lScalarizeVector(bo->getOperand(1), v1, vectorLength))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
for (int i = 0; i < vectorLength; ++i) {
|
||||||
scalarizedVector[i] =
|
scalarizedVector[i] =
|
||||||
llvm::BinaryOperator::Create(opcode, v0[i], v1[i], "flat_bop", bo);
|
llvm::BinaryOperator::Create(opcode, v0[i], v1[i], "flat_bop", bo);
|
||||||
lCopyMetadata(scalarizedVector[i], bo);
|
lCopyMetadata(scalarizedVector[i], bo);
|
||||||
@@ -1606,7 +1626,7 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
// vaue in scalarizedVector[] based on the value being inserted.
|
// vaue in scalarizedVector[] based on the value being inserted.
|
||||||
while (ie != NULL) {
|
while (ie != NULL) {
|
||||||
uint64_t iOffset = lGetIntValue(ie->getOperand(2));
|
uint64_t iOffset = lGetIntValue(ie->getOperand(2));
|
||||||
assert((int)iOffset < g->target.vectorWidth);
|
assert((int)iOffset < vectorLength);
|
||||||
assert(scalarizedVector[iOffset] == NULL);
|
assert(scalarizedVector[iOffset] == NULL);
|
||||||
|
|
||||||
scalarizedVector[iOffset] = ie->getOperand(1);
|
scalarizedVector[iOffset] = ie->getOperand(1);
|
||||||
@@ -1620,15 +1640,17 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
}
|
}
|
||||||
|
|
||||||
llvm::CastInst *ci = llvm::dyn_cast<llvm::CastInst>(vec);
|
llvm::CastInst *ci = llvm::dyn_cast<llvm::CastInst>(vec);
|
||||||
if (ci) {
|
if (ci != NULL) {
|
||||||
// Casts are similar to BinaryOperators in that we attempt to
|
// Casts are similar to BinaryOperators in that we attempt to
|
||||||
// scalarize the vector being cast and if successful, we apply
|
// scalarize the vector being cast and if successful, we apply
|
||||||
// equivalent scalar cast operators to each of the values in the
|
// equivalent scalar cast operators to each of the values in the
|
||||||
// scalarized vector.
|
// scalarized vector.
|
||||||
llvm::Instruction::CastOps op = ci->getOpcode();
|
llvm::Instruction::CastOps op = ci->getOpcode();
|
||||||
|
|
||||||
llvm::Value *scalarizedTarget[ISPC_MAX_NVEC];
|
llvm::Value **scalarizedTarget =
|
||||||
if (!lScalarizeVector(ci->getOperand(0), scalarizedTarget))
|
(llvm::Value **)alloca(vectorLength * sizeof(llvm::Value *));
|
||||||
|
if (!lScalarizeVector(ci->getOperand(0), scalarizedTarget,
|
||||||
|
vectorLength))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
LLVM_TYPE_CONST llvm::Type *destType = ci->getDestTy();
|
LLVM_TYPE_CONST llvm::Type *destType = ci->getDestTy();
|
||||||
@@ -1637,7 +1659,7 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
assert(vectorDestType != NULL);
|
assert(vectorDestType != NULL);
|
||||||
LLVM_TYPE_CONST llvm::Type *elementType = vectorDestType->getElementType();
|
LLVM_TYPE_CONST llvm::Type *elementType = vectorDestType->getElementType();
|
||||||
|
|
||||||
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
for (int i = 0; i < vectorLength; ++i) {
|
||||||
scalarizedVector[i] =
|
scalarizedVector[i] =
|
||||||
llvm::CastInst::Create(op, scalarizedTarget[i], elementType,
|
llvm::CastInst::Create(op, scalarizedTarget[i], elementType,
|
||||||
"cast", ci);
|
"cast", ci);
|
||||||
@@ -1647,16 +1669,11 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
}
|
}
|
||||||
|
|
||||||
llvm::ShuffleVectorInst *svi = llvm::dyn_cast<llvm::ShuffleVectorInst>(vec);
|
llvm::ShuffleVectorInst *svi = llvm::dyn_cast<llvm::ShuffleVectorInst>(vec);
|
||||||
if (svi) {
|
if (svi != NULL) {
|
||||||
// Note that the code for shufflevector instructions is untested.
|
|
||||||
// (We haven't yet had a case where it needs to run). Therefore,
|
|
||||||
// an assert at the bottom of this routien will hit the first time
|
|
||||||
// it runs as a reminder that this needs to be tested further.
|
|
||||||
|
|
||||||
LLVM_TYPE_CONST llvm::VectorType *svInstType =
|
LLVM_TYPE_CONST llvm::VectorType *svInstType =
|
||||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(svi->getType());
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(svi->getType());
|
||||||
assert(svInstType != NULL);
|
assert(svInstType != NULL);
|
||||||
assert((int)svInstType->getNumElements() == g->target.vectorWidth);
|
assert((int)svInstType->getNumElements() == vectorLength);
|
||||||
|
|
||||||
// Scalarize the two vectors being shuffled. First figure out how
|
// Scalarize the two vectors being shuffled. First figure out how
|
||||||
// big they are.
|
// big they are.
|
||||||
@@ -1671,58 +1688,90 @@ lScalarizeVector(llvm::Value *vec, llvm::Value *scalarizedVector[ISPC_MAX_NVEC])
|
|||||||
int n0 = vectorType0->getNumElements();
|
int n0 = vectorType0->getNumElements();
|
||||||
int n1 = vectorType1->getNumElements();
|
int n1 = vectorType1->getNumElements();
|
||||||
|
|
||||||
// FIXME: It's actually totally legitimate for these two to have
|
|
||||||
// different sizes; the final result just needs to have the native
|
|
||||||
// vector width. To handle this, not only do we need to
|
|
||||||
// potentially dynamically allocate space for the arrays passed
|
|
||||||
// into lScalarizeVector, but we need to change the rest of its
|
|
||||||
// implementation to not key off g->target.vectorWidth everywhere
|
|
||||||
// to get the sizes of the arrays to iterate over, etc.
|
|
||||||
assert(n0 == g->target.vectorWidth && n1 == g->target.vectorWidth);
|
|
||||||
|
|
||||||
// Go ahead and scalarize the two input vectors now.
|
// Go ahead and scalarize the two input vectors now.
|
||||||
// FIXME: it's ok if some or all of the values of these two vectors
|
llvm::Value **v0 = (llvm::Value **)alloca(n0 * sizeof(llvm::Value *));
|
||||||
// have undef values, so long as we don't try to access undef
|
llvm::Value **v1 = (llvm::Value **)alloca(n1 * sizeof(llvm::Value *));
|
||||||
// values with the vector indices provided to the instruction.
|
|
||||||
// Should fix lScalarizeVector so that it doesn't return false in
|
if (!lScalarizeVector(svi->getOperand(0), v0, n0) ||
|
||||||
// this case and just leaves the elements of the arrays with undef
|
!lScalarizeVector(svi->getOperand(1), v1, n1))
|
||||||
// values as NULL.
|
|
||||||
llvm::Value *v0[ISPC_MAX_NVEC], *v1[ISPC_MAX_NVEC];
|
|
||||||
if (!lScalarizeVector(svi->getOperand(0), v0) ||
|
|
||||||
!lScalarizeVector(svi->getOperand(1), v1))
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
llvm::ConstantVector *shuffleIndicesVector =
|
llvm::ConstantAggregateZero *caz =
|
||||||
llvm::dyn_cast<llvm::ConstantVector>(svi->getOperand(2));
|
llvm::dyn_cast<llvm::ConstantAggregateZero>(svi->getOperand(2));
|
||||||
// I think this has to be a ConstantVector. If this ever hits,
|
if (caz != NULL) {
|
||||||
// we'll dig into what we got instead and figure out how to handle
|
for (int i = 0; i < vectorLength; ++i)
|
||||||
// that...
|
scalarizedVector[i] = v0[0];
|
||||||
assert(shuffleIndicesVector != NULL);
|
}
|
||||||
|
else {
|
||||||
// Get the integer indices for each element of the returned vector
|
llvm::ConstantVector *shuffleIndicesVector =
|
||||||
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> shuffleIndices;
|
llvm::dyn_cast<llvm::ConstantVector>(svi->getOperand(2));
|
||||||
shuffleIndicesVector->getVectorElements(shuffleIndices);
|
// I think this has to be a ConstantVector. If this ever hits,
|
||||||
assert((int)shuffleIndices.size() == g->target.vectorWidth);
|
// we'll dig into what we got instead and figure out how to handle
|
||||||
|
// that...
|
||||||
// And loop over the indices, setting the i'th element of the
|
assert(shuffleIndicesVector != NULL);
|
||||||
// result vector with the source vector element that corresponds to
|
|
||||||
// the i'th shuffle index value.
|
// Get the integer indices for each element of the returned vector
|
||||||
for (unsigned int i = 0; i < shuffleIndices.size(); ++i) {
|
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> shuffleIndices;
|
||||||
if (!llvm::isa<llvm::ConstantInt>(shuffleIndices[i]))
|
shuffleIndicesVector->getVectorElements(shuffleIndices);
|
||||||
// I'm not sure when this case would ever happen, though..
|
assert((int)shuffleIndices.size() == vectorLength);
|
||||||
return false;
|
|
||||||
int offset = (int)lGetIntValue(shuffleIndices[i]);
|
// And loop over the indices, setting the i'th element of the
|
||||||
assert(offset >= 0 && offset < n0+n1);
|
// result vector with the source vector element that corresponds to
|
||||||
|
// the i'th shuffle index value.
|
||||||
if (offset < n0)
|
for (unsigned int i = 0; i < shuffleIndices.size(); ++i) {
|
||||||
// Offsets from 0 to n0-1 index into the first vector
|
// I'm not sure when this case would ever happen, though..
|
||||||
scalarizedVector[i] = v0[offset];
|
assert(llvm::isa<llvm::ConstantInt>(shuffleIndices[i]));
|
||||||
else
|
|
||||||
// And offsets from n0 to (n0+n1-1) index into the second
|
int offset = (int)lGetIntValue(shuffleIndices[i]);
|
||||||
// vector
|
assert(offset >= 0 && offset < n0+n1);
|
||||||
scalarizedVector[i] = v1[offset - n0];
|
|
||||||
|
if (offset < n0)
|
||||||
|
// Offsets from 0 to n0-1 index into the first vector
|
||||||
|
scalarizedVector[i] = v0[offset];
|
||||||
|
else
|
||||||
|
// And offsets from n0 to (n0+n1-1) index into the second
|
||||||
|
// vector
|
||||||
|
scalarizedVector[i] = v1[offset - n0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::LoadInst *li = llvm::dyn_cast<llvm::LoadInst>(vec);
|
||||||
|
if (li != NULL) {
|
||||||
|
llvm::Value *baseAddr = li->getOperand(0);
|
||||||
|
llvm::Value *baseInt = new llvm::PtrToIntInst(baseAddr, LLVMTypes::Int64Type,
|
||||||
|
"base2int", li);
|
||||||
|
lCopyMetadata(baseInt, li);
|
||||||
|
|
||||||
|
LLVM_TYPE_CONST llvm::PointerType *ptrType =
|
||||||
|
llvm::dyn_cast<llvm::PointerType>(baseAddr->getType());
|
||||||
|
assert(ptrType != NULL);
|
||||||
|
LLVM_TYPE_CONST llvm::VectorType *vecType =
|
||||||
|
llvm::dyn_cast<llvm::VectorType>(ptrType->getElementType());
|
||||||
|
assert(vecType != NULL);
|
||||||
|
LLVM_TYPE_CONST llvm::Type *elementType = vecType->getElementType();
|
||||||
|
uint64_t elementSize;
|
||||||
|
bool sizeKnown = lSizeOfIfKnown(elementType, &elementSize);
|
||||||
|
assert(sizeKnown == true);
|
||||||
|
|
||||||
|
LLVM_TYPE_CONST llvm::Type *eltPtrType = llvm::PointerType::get(elementType, 0);
|
||||||
|
|
||||||
|
for (int i = 0; i < vectorLength; ++i) {
|
||||||
|
llvm::Value *intPtrOffset =
|
||||||
|
llvm::BinaryOperator::Create(llvm::Instruction::Add, baseInt,
|
||||||
|
LLVMInt64(i * elementSize), "baseoffset",
|
||||||
|
li);
|
||||||
|
lCopyMetadata(intPtrOffset, li);
|
||||||
|
llvm::Value *scalarLoadPtr =
|
||||||
|
new llvm::IntToPtrInst(intPtrOffset, eltPtrType, "int2ptr", li);
|
||||||
|
lCopyMetadata(scalarLoadPtr, li);
|
||||||
|
|
||||||
|
llvm::Instruction *scalarLoad =
|
||||||
|
new llvm::LoadInst(scalarLoadPtr, "loadelt", li);
|
||||||
|
lCopyMetadata(scalarLoad, li);
|
||||||
|
scalarizedVector[i] = scalarLoad;
|
||||||
}
|
}
|
||||||
FATAL("the above code is untested so far; check now that it's actually running");
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2134,11 +2183,18 @@ GSImprovementsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
|||||||
if (ce && ce->getOpcode() == llvm::Instruction::BitCast)
|
if (ce && ce->getOpcode() == llvm::Instruction::BitCast)
|
||||||
base = ce->getOperand(0);
|
base = ce->getOperand(0);
|
||||||
|
|
||||||
// Try to out the offsets; the i'th element of the offsetElements
|
// Try to find out the offsets; the i'th element of the
|
||||||
// array should be an i32 with the value of the offset for the i'th
|
// offsetElements array should be an i32 with the value of the
|
||||||
// vector lane. This may fail; if so, just give up.
|
// offset for the i'th vector lane. This may fail; if so, just
|
||||||
|
// give up.
|
||||||
|
llvm::Value *vecValue = callInst->getArgOperand(1);
|
||||||
|
LLVM_TYPE_CONST llvm::VectorType *vt =
|
||||||
|
llvm::dyn_cast<llvm::VectorType>(vecValue->getType());
|
||||||
|
assert(vt != NULL);
|
||||||
|
int vecLength = vt->getNumElements();
|
||||||
|
assert(vecLength == g->target.vectorWidth);
|
||||||
llvm::Value *offsetElements[ISPC_MAX_NVEC];
|
llvm::Value *offsetElements[ISPC_MAX_NVEC];
|
||||||
if (!lScalarizeVector(callInst->getArgOperand(1), offsetElements))
|
if (!lScalarizeVector(vecValue, offsetElements, vecLength))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
llvm::Value *mask = callInst->getArgOperand((gatherInfo != NULL) ? 2 : 3);
|
llvm::Value *mask = callInst->getArgOperand((gatherInfo != NULL) ? 2 : 3);
|
||||||
|
|||||||
17
tests/shuffle-flatten.ispc
Normal file
17
tests/shuffle-flatten.ispc
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
export uniform int width() { return programCount; }
|
||||||
|
|
||||||
|
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||||
|
int tmp1 = shuffle(programIndex, 0, programIndex);
|
||||||
|
|
||||||
|
RET[programIndex] = 10;
|
||||||
|
if (programIndex < 1) {
|
||||||
|
uniform int foo = extract(tmp1, 0);
|
||||||
|
RET[programIndex] = aFOO[foo + programIndex];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export void result(uniform float RET[]) {
|
||||||
|
RET[programIndex] = 10;
|
||||||
|
RET[0] = 1;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user