Initial Support for new stdlib shift operator
This commit is contained in:
@@ -536,6 +536,12 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__set_system_isa",
|
"__set_system_isa",
|
||||||
"__sext_uniform_bool",
|
"__sext_uniform_bool",
|
||||||
"__sext_varying_bool",
|
"__sext_varying_bool",
|
||||||
|
"__shift_double",
|
||||||
|
"__shift_float",
|
||||||
|
"__shift_i16",
|
||||||
|
"__shift_i32",
|
||||||
|
"__shift_i64",
|
||||||
|
"__shift_i8",
|
||||||
"__shuffle2_double",
|
"__shuffle2_double",
|
||||||
"__shuffle2_float",
|
"__shuffle2_float",
|
||||||
"__shuffle2_i16",
|
"__shuffle2_i16",
|
||||||
|
|||||||
@@ -80,6 +80,13 @@ declare <WIDTH x i32> @__rotate_i32(<WIDTH x i32>, i32) nounwind readnone
|
|||||||
declare <WIDTH x double> @__rotate_double(<WIDTH x double>, i32) nounwind readnone
|
declare <WIDTH x double> @__rotate_double(<WIDTH x double>, i32) nounwind readnone
|
||||||
declare <WIDTH x i64> @__rotate_i64(<WIDTH x i64>, i32) nounwind readnone
|
declare <WIDTH x i64> @__rotate_i64(<WIDTH x i64>, i32) nounwind readnone
|
||||||
|
|
||||||
|
declare <WIDTH x i8> @__shift_i8(<WIDTH x i8>, i32) nounwind readnone
|
||||||
|
declare <WIDTH x i16> @__shift_i16(<WIDTH x i16>, i32) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__shift_float(<WIDTH x float>, i32) nounwind readnone
|
||||||
|
declare <WIDTH x i32> @__shift_i32(<WIDTH x i32>, i32) nounwind readnone
|
||||||
|
declare <WIDTH x double> @__shift_double(<WIDTH x double>, i32) nounwind readnone
|
||||||
|
declare <WIDTH x i64> @__shift_i64(<WIDTH x i64>, i32) nounwind readnone
|
||||||
|
|
||||||
declare <WIDTH x i8> @__shuffle_i8(<WIDTH x i8>, <WIDTH x i32>) nounwind readnone
|
declare <WIDTH x i8> @__shuffle_i8(<WIDTH x i8>, <WIDTH x i32>) nounwind readnone
|
||||||
declare <WIDTH x i8> @__shuffle2_i8(<WIDTH x i8>, <WIDTH x i8>,
|
declare <WIDTH x i8> @__shuffle2_i8(<WIDTH x i8>, <WIDTH x i8>,
|
||||||
<WIDTH x i32>) nounwind readnone
|
<WIDTH x i32>) nounwind readnone
|
||||||
|
|||||||
@@ -797,6 +797,43 @@ not_const:
|
|||||||
ret <WIDTH x $1> %result
|
ret <WIDTH x $1> %result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <WIDTH x $1> @__shift_$1(<WIDTH x $1>, i32) nounwind readnone alwaysinline {
|
||||||
|
%isc = call i1 @__is_compile_time_constant_uniform_int32(i32 %1)
|
||||||
|
%zeropaddedvec = shufflevector <WIDTH x $1> %0, <WIDTH x $1> zeroinitializer,
|
||||||
|
<eval(2*WIDTH) x i32> < forloop(i, 0, eval(2*WIDTH-2), `i32 i, ')i32 eval(2*WIDTH-1) >
|
||||||
|
br i1 %isc, label %is_const, label %not_const
|
||||||
|
|
||||||
|
is_const:
|
||||||
|
; though verbose, this turms into tight code if %1 is a constant
|
||||||
|
forloop(i, 0, eval(WIDTH-1), `
|
||||||
|
%delta_`'i = add i32 %1, i
|
||||||
|
%delta_clamped_`'i = and i32 %delta_`'i, eval(2*WIDTH-1)
|
||||||
|
%v_`'i = extractelement <eval(2*WIDTH) x $1> %zeropaddedvec, i32 %delta_clamped_`'i')
|
||||||
|
%ret_0 = insertelement <WIDTH x $1> zeroinitializer, $1 %v_0, i32 0
|
||||||
|
forloop(i, 1, eval(WIDTH-1), ` %ret_`'i = insertelement <WIDTH x $1> %ret_`'eval(i-1), $1 %v_`'i, i32 i
|
||||||
|
')
|
||||||
|
ret <WIDTH x $1> %ret_`'eval(WIDTH-1)
|
||||||
|
|
||||||
|
not_const:
|
||||||
|
; store two instances of the vector into memory
|
||||||
|
%ptr = alloca <WIDTH x $1>, i32 3
|
||||||
|
%ptr0 = getelementptr <WIDTH x $1> * %ptr, i32 0
|
||||||
|
store <WIDTH x $1> zeroinitializer, <WIDTH x $1> * %ptr0
|
||||||
|
%ptr1 = getelementptr <WIDTH x $1> * %ptr, i32 1
|
||||||
|
store <WIDTH x $1> %0, <WIDTH x $1> * %ptr1
|
||||||
|
%ptr2 = getelementptr <WIDTH x $1> * %ptr, i32 2
|
||||||
|
store <WIDTH x $1> zeroinitializer, <WIDTH x $1> * %ptr2
|
||||||
|
|
||||||
|
; compute offset in [0,vectorwidth-1], then index into the doubled-up vector
|
||||||
|
%offset = add i32 %1, 16
|
||||||
|
%ptr_as_elt_array = bitcast <WIDTH x $1> * %ptr to [eval(3*WIDTH) x $1] *
|
||||||
|
%load_ptr = getelementptr [eval(3*WIDTH) x $1] * %ptr_as_elt_array, i32 0, i32 %offset
|
||||||
|
%load_ptr_vec = bitcast $1 * %load_ptr to <WIDTH x $1> *
|
||||||
|
%result = load <WIDTH x $1> * %load_ptr_vec, align $2
|
||||||
|
ret <WIDTH x $1> %result
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
define <WIDTH x $1> @__shuffle_$1(<WIDTH x $1>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
define <WIDTH x $1> @__shuffle_$1(<WIDTH x $1>, <WIDTH x i32>) nounwind readnone alwaysinline {
|
||||||
forloop(i, 0, eval(WIDTH-1), `
|
forloop(i, 0, eval(WIDTH-1), `
|
||||||
%index_`'i = extractelement <WIDTH x i32> %1, i32 i')
|
%index_`'i = extractelement <WIDTH x i32> %1, i32 i')
|
||||||
|
|||||||
137
opt.cpp
137
opt.cpp
@@ -72,6 +72,7 @@
|
|||||||
#include <llvm/Analysis/ConstantFolding.h>
|
#include <llvm/Analysis/ConstantFolding.h>
|
||||||
#include <llvm/Target/TargetLibraryInfo.h>
|
#include <llvm/Target/TargetLibraryInfo.h>
|
||||||
#include <llvm/ADT/Triple.h>
|
#include <llvm/ADT/Triple.h>
|
||||||
|
#include <llvm/ADT/SmallSet.h>
|
||||||
#include <llvm/Transforms/Scalar.h>
|
#include <llvm/Transforms/Scalar.h>
|
||||||
#include <llvm/Transforms/IPO.h>
|
#include <llvm/Transforms/IPO.h>
|
||||||
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
|
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
|
||||||
@@ -124,6 +125,8 @@ static llvm::Pass *CreateMakeInternalFuncsStaticPass();
|
|||||||
|
|
||||||
static llvm::Pass *CreateDebugPass(char * output);
|
static llvm::Pass *CreateDebugPass(char * output);
|
||||||
|
|
||||||
|
static llvm::Pass *CreateReplaceExtractInsertChainsPass();
|
||||||
|
|
||||||
#define DEBUG_START_PASS(NAME) \
|
#define DEBUG_START_PASS(NAME) \
|
||||||
if (g->debugPrint && \
|
if (g->debugPrint && \
|
||||||
(getenv("FUNC") == NULL || \
|
(getenv("FUNC") == NULL || \
|
||||||
@@ -635,6 +638,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
optPM.add(CreateIsCompileTimeConstantPass(true));
|
optPM.add(CreateIsCompileTimeConstantPass(true));
|
||||||
optPM.add(CreateIntrinsicsOptPass());
|
optPM.add(CreateIntrinsicsOptPass());
|
||||||
optPM.add(CreateInstructionSimplifyPass());
|
optPM.add(CreateInstructionSimplifyPass());
|
||||||
|
optPM.add(CreateReplaceExtractInsertChainsPass());
|
||||||
|
|
||||||
optPM.add(llvm::createMemCpyOptPass());
|
optPM.add(llvm::createMemCpyOptPass());
|
||||||
optPM.add(llvm::createSCCPPass());
|
optPM.add(llvm::createSCCPPass());
|
||||||
@@ -4923,3 +4927,136 @@ static llvm::Pass *
|
|||||||
CreatePeepholePass() {
|
CreatePeepholePass() {
|
||||||
return new PeepholePass;
|
return new PeepholePass;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// ReplaceExtractInsertChainsPass
|
||||||
|
|
||||||
|
/**
|
||||||
|
We occassionally get chains of ExtractElementInsts followed by
|
||||||
|
InsertElementInsts. Unfortunately, all of these can't be replaced by
|
||||||
|
ShuffleVectorInsts as we don't know that things are constant at the time.
|
||||||
|
|
||||||
|
This Pass will detect such chains, and replace them with ShuffleVectorInsts
|
||||||
|
if all the appropriate values are constant.
|
||||||
|
*/
|
||||||
|
|
||||||
|
class ReplaceExtractInsertChainsPass : public llvm::BasicBlockPass {
|
||||||
|
public:
|
||||||
|
static char ID;
|
||||||
|
ReplaceExtractInsertChainsPass() : BasicBlockPass(ID) {
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *getPassName() const { return "Resolve \"replace extract insert chains\""; }
|
||||||
|
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
char ReplaceExtractInsertChainsPass::ID = 0;
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
/** Given an llvm::Value known to be an integer, return its value as
|
||||||
|
an int64_t.
|
||||||
|
*/
|
||||||
|
static int64_t
|
||||||
|
lGetIntValue(llvm::Value *offset) {
|
||||||
|
llvm::ConstantInt *intOffset = llvm::dyn_cast<llvm::ConstantInt>(offset);
|
||||||
|
Assert(intOffset && (intOffset->getBitWidth() == 32 ||
|
||||||
|
intOffset->getBitWidth() == 64));
|
||||||
|
return intOffset->getSExtValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
ReplaceExtractInsertChainsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||||
|
DEBUG_START_PASS("ReplaceExtractInsertChainsPass");
|
||||||
|
bool modifiedAny = false;
|
||||||
|
|
||||||
|
// Initialize our mapping to the first spot in the zero vector
|
||||||
|
int vectorWidth = g->target->getVectorWidth();
|
||||||
|
int shuffleMap[vectorWidth];
|
||||||
|
for (int i = 0; i < vectorWidth; i++) {
|
||||||
|
shuffleMap[i] = vectorWidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hack-y. 16 is likely the upper limit for now.
|
||||||
|
llvm::SmallSet<llvm::Value *, 16> inserts;
|
||||||
|
|
||||||
|
// save the last Insert in the chain
|
||||||
|
llvm::Value * lastInsert = NULL;
|
||||||
|
|
||||||
|
for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
|
||||||
|
// Iterate through the instructions looking for InsertElementInsts
|
||||||
|
llvm::InsertElementInst *ieInst = llvm::dyn_cast<llvm::InsertElementInst>(&*i);
|
||||||
|
if (ieInst == NULL) {
|
||||||
|
// These aren't the instructions you're looking for.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Value * base = ieInst->getOperand(0);
|
||||||
|
if ( (llvm::isa<llvm::UndefValue>(base))
|
||||||
|
|| (llvm::isa<llvm::ConstantAggregateZero>(base))
|
||||||
|
|| (base == lastInsert)) {
|
||||||
|
// if source for insert scalar is 0 or an EEInst, add insert
|
||||||
|
llvm::Value *scalar = ieInst->getOperand(1);
|
||||||
|
if (llvm::ExtractElementInst *eeInst = llvm::dyn_cast<llvm::ExtractElementInst>(scalar)) {
|
||||||
|
// We're only going to deal with Inserts into a Constant vector lane
|
||||||
|
if (llvm::isa<llvm::Constant>(eeInst->getOperand(1))) {
|
||||||
|
inserts.insert(ieInst);
|
||||||
|
lastInsert = ieInst;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(scalar)) {
|
||||||
|
if (ci->isZero()) {
|
||||||
|
inserts.insert(ieInst);
|
||||||
|
lastInsert = ieInst;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
lastInsert = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for chains, not insert/shuffle sequences
|
||||||
|
if (inserts.size() > 1) {
|
||||||
|
// The vector from which we're extracting elements
|
||||||
|
llvm::Value * baseVec = NULL;
|
||||||
|
llvm::Value *ee = llvm::cast<llvm::InsertElementInst>((*inserts.begin()))->getOperand(1);
|
||||||
|
if (llvm::ExtractElementInst *eeInst = llvm::dyn_cast<llvm::ExtractElementInst>(ee)) {
|
||||||
|
baseVec = eeInst->getOperand(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool sameBase = true;
|
||||||
|
for (llvm::SmallSet<llvm::Value *,16>::iterator i = inserts.begin(); i != inserts.end(); i++) {
|
||||||
|
llvm::InsertElementInst *ie = llvm::cast<llvm::InsertElementInst>(*i);
|
||||||
|
if (llvm::ExtractElementInst *ee = llvm::dyn_cast<llvm::ExtractElementInst>(ie->getOperand(1))) {
|
||||||
|
if (ee->getOperand(0) != baseVec) {
|
||||||
|
sameBase = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
int64_t from = lGetIntValue(ee->getIndexOperand());
|
||||||
|
int64_t to = lGetIntValue(ie->getOperand(2));
|
||||||
|
shuffleMap[to] = from;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (sameBase) {
|
||||||
|
llvm::Value *shuffleIdxs = LLVMInt32Vector(shuffleMap);
|
||||||
|
llvm::Value *zeroVec = llvm::ConstantAggregateZero::get(shuffleIdxs->getType());
|
||||||
|
llvm::Value *shuffle = new llvm::ShuffleVectorInst(baseVec, zeroVec, shuffleIdxs, "shiftInZero", llvm::cast<llvm::Instruction>(lastInsert));
|
||||||
|
// For now, be lazy and let DCE clean up the Extracts/Inserts.
|
||||||
|
lastInsert->replaceAllUsesWith(shuffle);
|
||||||
|
|
||||||
|
modifiedAny = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_END_PASS("ReplaceExtractInsertChainsPass");
|
||||||
|
|
||||||
|
return modifiedAny;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static llvm::Pass *
|
||||||
|
CreateReplaceExtractInsertChainsPass() {
|
||||||
|
return new ReplaceExtractInsertChainsPass();
|
||||||
|
}
|
||||||
|
|||||||
30
stdlib.ispc
30
stdlib.ispc
@@ -170,6 +170,36 @@ static inline int64 rotate(int64 v, uniform int i) {
|
|||||||
return __rotate_i64(v, i);
|
return __rotate_i64(v, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(safe)
|
||||||
|
static inline float shift(float v, uniform int i) {
|
||||||
|
return __shift_float(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
__declspec(safe)
|
||||||
|
static inline int8 shift(int8 v, uniform int i) {
|
||||||
|
return __shift_i8(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
__declspec(safe)
|
||||||
|
static inline int16 shift(int16 v, uniform int i) {
|
||||||
|
return __shift_i16(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
__declspec(safe)
|
||||||
|
static inline int32 shift(int32 v, uniform int i) {
|
||||||
|
return __shift_i32(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
__declspec(safe)
|
||||||
|
static inline double shift(double v, uniform int i) {
|
||||||
|
return __shift_double(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
__declspec(safe)
|
||||||
|
static inline int64 shift(int64 v, uniform int i) {
|
||||||
|
return __shift_i64(v, i);
|
||||||
|
}
|
||||||
|
|
||||||
__declspec(safe)
|
__declspec(safe)
|
||||||
static inline float shuffle(float v, int i) {
|
static inline float shuffle(float v, int i) {
|
||||||
return __shuffle_float(v, i);
|
return __shuffle_float(v, i);
|
||||||
|
|||||||
Reference in New Issue
Block a user