Initial Support for new stdlib shift operator

This commit is contained in:
james.brodman
2013-10-22 18:06:54 -04:00
parent c18fa15db1
commit 899f85ce9c
5 changed files with 217 additions and 0 deletions

View File

@@ -536,6 +536,12 @@ lSetInternalFunctions(llvm::Module *module) {
"__set_system_isa",
"__sext_uniform_bool",
"__sext_varying_bool",
"__shift_double",
"__shift_float",
"__shift_i16",
"__shift_i32",
"__shift_i64",
"__shift_i8",
"__shuffle2_double",
"__shuffle2_float",
"__shuffle2_i16",

View File

@@ -80,6 +80,13 @@ declare <WIDTH x i32> @__rotate_i32(<WIDTH x i32>, i32) nounwind readnone
declare <WIDTH x double> @__rotate_double(<WIDTH x double>, i32) nounwind readnone
declare <WIDTH x i64> @__rotate_i64(<WIDTH x i64>, i32) nounwind readnone
declare <WIDTH x i8> @__shift_i8(<WIDTH x i8>, i32) nounwind readnone
declare <WIDTH x i16> @__shift_i16(<WIDTH x i16>, i32) nounwind readnone
declare <WIDTH x float> @__shift_float(<WIDTH x float>, i32) nounwind readnone
declare <WIDTH x i32> @__shift_i32(<WIDTH x i32>, i32) nounwind readnone
declare <WIDTH x double> @__shift_double(<WIDTH x double>, i32) nounwind readnone
declare <WIDTH x i64> @__shift_i64(<WIDTH x i64>, i32) nounwind readnone
declare <WIDTH x i8> @__shuffle_i8(<WIDTH x i8>, <WIDTH x i32>) nounwind readnone
declare <WIDTH x i8> @__shuffle2_i8(<WIDTH x i8>, <WIDTH x i8>,
<WIDTH x i32>) nounwind readnone

View File

@@ -797,6 +797,43 @@ not_const:
ret <WIDTH x $1> %result
}
define <WIDTH x $1> @__shift_$1(<WIDTH x $1>, i32) nounwind readnone alwaysinline {
%isc = call i1 @__is_compile_time_constant_uniform_int32(i32 %1)
%zeropaddedvec = shufflevector <WIDTH x $1> %0, <WIDTH x $1> zeroinitializer,
<eval(2*WIDTH) x i32> < forloop(i, 0, eval(2*WIDTH-2), `i32 i, ')i32 eval(2*WIDTH-1) >
br i1 %isc, label %is_const, label %not_const
is_const:
; though verbose, this turms into tight code if %1 is a constant
forloop(i, 0, eval(WIDTH-1), `
%delta_`'i = add i32 %1, i
%delta_clamped_`'i = and i32 %delta_`'i, eval(2*WIDTH-1)
%v_`'i = extractelement <eval(2*WIDTH) x $1> %zeropaddedvec, i32 %delta_clamped_`'i')
%ret_0 = insertelement <WIDTH x $1> zeroinitializer, $1 %v_0, i32 0
forloop(i, 1, eval(WIDTH-1), ` %ret_`'i = insertelement <WIDTH x $1> %ret_`'eval(i-1), $1 %v_`'i, i32 i
')
ret <WIDTH x $1> %ret_`'eval(WIDTH-1)
not_const:
; store two instances of the vector into memory
%ptr = alloca <WIDTH x $1>, i32 3
%ptr0 = getelementptr <WIDTH x $1> * %ptr, i32 0
store <WIDTH x $1> zeroinitializer, <WIDTH x $1> * %ptr0
%ptr1 = getelementptr <WIDTH x $1> * %ptr, i32 1
store <WIDTH x $1> %0, <WIDTH x $1> * %ptr1
%ptr2 = getelementptr <WIDTH x $1> * %ptr, i32 2
store <WIDTH x $1> zeroinitializer, <WIDTH x $1> * %ptr2
; compute offset in [0,vectorwidth-1], then index into the doubled-up vector
%offset = add i32 %1, 16
%ptr_as_elt_array = bitcast <WIDTH x $1> * %ptr to [eval(3*WIDTH) x $1] *
%load_ptr = getelementptr [eval(3*WIDTH) x $1] * %ptr_as_elt_array, i32 0, i32 %offset
%load_ptr_vec = bitcast $1 * %load_ptr to <WIDTH x $1> *
%result = load <WIDTH x $1> * %load_ptr_vec, align $2
ret <WIDTH x $1> %result
}
define <WIDTH x $1> @__shuffle_$1(<WIDTH x $1>, <WIDTH x i32>) nounwind readnone alwaysinline {
forloop(i, 0, eval(WIDTH-1), `
%index_`'i = extractelement <WIDTH x i32> %1, i32 i')

137
opt.cpp
View File

@@ -72,6 +72,7 @@
#include <llvm/Analysis/ConstantFolding.h>
#include <llvm/Target/TargetLibraryInfo.h>
#include <llvm/ADT/Triple.h>
#include <llvm/ADT/SmallSet.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
@@ -124,6 +125,8 @@ static llvm::Pass *CreateMakeInternalFuncsStaticPass();
static llvm::Pass *CreateDebugPass(char * output);
static llvm::Pass *CreateReplaceExtractInsertChainsPass();
#define DEBUG_START_PASS(NAME) \
if (g->debugPrint && \
(getenv("FUNC") == NULL || \
@@ -635,6 +638,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(CreateIsCompileTimeConstantPass(true));
optPM.add(CreateIntrinsicsOptPass());
optPM.add(CreateInstructionSimplifyPass());
optPM.add(CreateReplaceExtractInsertChainsPass());
optPM.add(llvm::createMemCpyOptPass());
optPM.add(llvm::createSCCPPass());
@@ -4923,3 +4927,136 @@ static llvm::Pass *
CreatePeepholePass() {
return new PeepholePass;
}
///////////////////////////////////////////////////////////////////////////
// ReplaceExtractInsertChainsPass
/**
We occassionally get chains of ExtractElementInsts followed by
InsertElementInsts. Unfortunately, all of these can't be replaced by
ShuffleVectorInsts as we don't know that things are constant at the time.
This Pass will detect such chains, and replace them with ShuffleVectorInsts
if all the appropriate values are constant.
*/
class ReplaceExtractInsertChainsPass : public llvm::BasicBlockPass {
public:
static char ID;
ReplaceExtractInsertChainsPass() : BasicBlockPass(ID) {
}
const char *getPassName() const { return "Resolve \"replace extract insert chains\""; }
bool runOnBasicBlock(llvm::BasicBlock &BB);
};
char ReplaceExtractInsertChainsPass::ID = 0;
#include <iostream>
/** Given an llvm::Value known to be an integer, return its value as
an int64_t.
*/
static int64_t
lGetIntValue(llvm::Value *offset) {
llvm::ConstantInt *intOffset = llvm::dyn_cast<llvm::ConstantInt>(offset);
Assert(intOffset && (intOffset->getBitWidth() == 32 ||
intOffset->getBitWidth() == 64));
return intOffset->getSExtValue();
}
bool
ReplaceExtractInsertChainsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("ReplaceExtractInsertChainsPass");
bool modifiedAny = false;
// Initialize our mapping to the first spot in the zero vector
int vectorWidth = g->target->getVectorWidth();
int shuffleMap[vectorWidth];
for (int i = 0; i < vectorWidth; i++) {
shuffleMap[i] = vectorWidth;
}
// Hack-y. 16 is likely the upper limit for now.
llvm::SmallSet<llvm::Value *, 16> inserts;
// save the last Insert in the chain
llvm::Value * lastInsert = NULL;
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
// Iterate through the instructions looking for InsertElementInsts
llvm::InsertElementInst *ieInst = llvm::dyn_cast<llvm::InsertElementInst>(&*i);
if (ieInst == NULL) {
// These aren't the instructions you're looking for.
continue;
}
llvm::Value * base = ieInst->getOperand(0);
if ( (llvm::isa<llvm::UndefValue>(base))
|| (llvm::isa<llvm::ConstantAggregateZero>(base))
|| (base == lastInsert)) {
// if source for insert scalar is 0 or an EEInst, add insert
llvm::Value *scalar = ieInst->getOperand(1);
if (llvm::ExtractElementInst *eeInst = llvm::dyn_cast<llvm::ExtractElementInst>(scalar)) {
// We're only going to deal with Inserts into a Constant vector lane
if (llvm::isa<llvm::Constant>(eeInst->getOperand(1))) {
inserts.insert(ieInst);
lastInsert = ieInst;
}
}
else if (llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(scalar)) {
if (ci->isZero()) {
inserts.insert(ieInst);
lastInsert = ieInst;
}
}
else {
lastInsert = NULL;
}
}
}
// Look for chains, not insert/shuffle sequences
if (inserts.size() > 1) {
// The vector from which we're extracting elements
llvm::Value * baseVec = NULL;
llvm::Value *ee = llvm::cast<llvm::InsertElementInst>((*inserts.begin()))->getOperand(1);
if (llvm::ExtractElementInst *eeInst = llvm::dyn_cast<llvm::ExtractElementInst>(ee)) {
baseVec = eeInst->getOperand(0);
}
bool sameBase = true;
for (llvm::SmallSet<llvm::Value *,16>::iterator i = inserts.begin(); i != inserts.end(); i++) {
llvm::InsertElementInst *ie = llvm::cast<llvm::InsertElementInst>(*i);
if (llvm::ExtractElementInst *ee = llvm::dyn_cast<llvm::ExtractElementInst>(ie->getOperand(1))) {
if (ee->getOperand(0) != baseVec) {
sameBase = false;
break;
}
int64_t from = lGetIntValue(ee->getIndexOperand());
int64_t to = lGetIntValue(ie->getOperand(2));
shuffleMap[to] = from;
}
}
if (sameBase) {
llvm::Value *shuffleIdxs = LLVMInt32Vector(shuffleMap);
llvm::Value *zeroVec = llvm::ConstantAggregateZero::get(shuffleIdxs->getType());
llvm::Value *shuffle = new llvm::ShuffleVectorInst(baseVec, zeroVec, shuffleIdxs, "shiftInZero", llvm::cast<llvm::Instruction>(lastInsert));
// For now, be lazy and let DCE clean up the Extracts/Inserts.
lastInsert->replaceAllUsesWith(shuffle);
modifiedAny = true;
}
}
DEBUG_END_PASS("ReplaceExtractInsertChainsPass");
return modifiedAny;
}
static llvm::Pass *
CreateReplaceExtractInsertChainsPass() {
return new ReplaceExtractInsertChainsPass();
}

View File

@@ -170,6 +170,36 @@ static inline int64 rotate(int64 v, uniform int i) {
return __rotate_i64(v, i);
}
__declspec(safe)
static inline float shift(float v, uniform int i) {
return __shift_float(v, i);
}
__declspec(safe)
static inline int8 shift(int8 v, uniform int i) {
return __shift_i8(v, i);
}
__declspec(safe)
static inline int16 shift(int16 v, uniform int i) {
return __shift_i16(v, i);
}
__declspec(safe)
static inline int32 shift(int32 v, uniform int i) {
return __shift_i32(v, i);
}
__declspec(safe)
static inline double shift(double v, uniform int i) {
return __shift_double(v, i);
}
__declspec(safe)
static inline int64 shift(int64 v, uniform int i) {
return __shift_i64(v, i);
}
__declspec(safe)
static inline float shuffle(float v, int i) {
return __shuffle_float(v, i);