Initial Support for new stdlib shift operator

2013-10-22 18:06:54 -04:00
parent c18fa15db1
commit 899f85ce9c
5 changed files with 217 additions and 0 deletions
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -536,6 +536,12 @@ lSetInternalFunctions(llvm::Module *module) {
        "__set_system_isa",
        "__sext_uniform_bool",
        "__sext_varying_bool",
        "__shift_double",
        "__shift_float",
        "__shift_i16",
        "__shift_i32",
        "__shift_i64",
        "__shift_i8",
        "__shuffle2_double",
        "__shuffle2_float",
        "__shuffle2_i16",
--- a/builtins/target-generic-common.ll
+++ b/builtins/target-generic-common.ll
@@ -80,6 +80,13 @@ declare <WIDTH x i32> @__rotate_i32(<WIDTH x i32>, i32) nounwind readnone
 declare <WIDTH x double> @__rotate_double(<WIDTH x double>, i32) nounwind readnone
 declare <WIDTH x i64> @__rotate_i64(<WIDTH x i64>, i32) nounwind readnone
 declare <WIDTH x i8> @__shift_i8(<WIDTH x i8>, i32) nounwind readnone
 declare <WIDTH x i16> @__shift_i16(<WIDTH x i16>, i32) nounwind readnone
 declare <WIDTH x float> @__shift_float(<WIDTH x float>, i32) nounwind readnone
 declare <WIDTH x i32> @__shift_i32(<WIDTH x i32>, i32) nounwind readnone
 declare <WIDTH x double> @__shift_double(<WIDTH x double>, i32) nounwind readnone
 declare <WIDTH x i64> @__shift_i64(<WIDTH x i64>, i32) nounwind readnone
 declare <WIDTH x i8> @__shuffle_i8(<WIDTH x i8>, <WIDTH x i32>) nounwind readnone
 declare <WIDTH x i8> @__shuffle2_i8(<WIDTH x i8>, <WIDTH x i8>,
                                    <WIDTH x i32>) nounwind readnone
--- a/builtins/util.m4
+++ b/builtins/util.m4
@@ -797,6 +797,43 @@ not_const:
  ret <WIDTH x $1> %result
 }
 define <WIDTH x $1> @__shift_$1(<WIDTH x $1>, i32) nounwind readnone alwaysinline {
  %isc = call i1 @__is_compile_time_constant_uniform_int32(i32 %1)
  %zeropaddedvec = shufflevector <WIDTH x $1> %0, <WIDTH x $1> zeroinitializer,
                     <eval(2*WIDTH) x i32> < forloop(i, 0, eval(2*WIDTH-2), `i32 i, ')i32 eval(2*WIDTH-1) >
  br i1 %isc, label %is_const, label %not_const
 is_const:
  ; though verbose, this turms into tight code if %1 is a constant
 forloop(i, 0, eval(WIDTH-1), `  
  %delta_`'i = add i32 %1, i
  %delta_clamped_`'i = and i32 %delta_`'i, eval(2*WIDTH-1)
  %v_`'i = extractelement <eval(2*WIDTH) x $1> %zeropaddedvec, i32 %delta_clamped_`'i')
  %ret_0 = insertelement <WIDTH x $1> zeroinitializer, $1 %v_0, i32 0
 forloop(i, 1, eval(WIDTH-1), `  %ret_`'i = insertelement <WIDTH x $1> %ret_`'eval(i-1), $1 %v_`'i, i32 i
 ')
  ret <WIDTH x $1> %ret_`'eval(WIDTH-1)
 not_const:
  ; store two instances of the vector into memory
  %ptr = alloca <WIDTH x $1>, i32 3
  %ptr0 = getelementptr <WIDTH x $1> * %ptr, i32 0
  store <WIDTH x $1> zeroinitializer, <WIDTH x $1> * %ptr0
  %ptr1 = getelementptr <WIDTH x $1> * %ptr, i32 1
  store <WIDTH x $1> %0, <WIDTH x $1> * %ptr1
  %ptr2 = getelementptr <WIDTH x $1> * %ptr, i32 2
  store <WIDTH x $1> zeroinitializer, <WIDTH x $1> * %ptr2
  ; compute offset in [0,vectorwidth-1], then index into the doubled-up vector
  %offset = add i32 %1, 16
  %ptr_as_elt_array = bitcast <WIDTH x $1> * %ptr to [eval(3*WIDTH) x $1] *
  %load_ptr = getelementptr [eval(3*WIDTH) x $1] * %ptr_as_elt_array, i32 0, i32 %offset
  %load_ptr_vec = bitcast $1 * %load_ptr to <WIDTH x $1> *
  %result = load <WIDTH x $1> * %load_ptr_vec, align $2
  ret <WIDTH x $1> %result
 }
 define <WIDTH x $1> @__shuffle_$1(<WIDTH x $1>, <WIDTH x i32>) nounwind readnone alwaysinline {
 forloop(i, 0, eval(WIDTH-1), `  
  %index_`'i = extractelement <WIDTH x i32> %1, i32 i')
--- a/opt.cpp
+++ b/opt.cpp
@@ -72,6 +72,7 @@
 #include <llvm/Analysis/ConstantFolding.h>
 #include <llvm/Target/TargetLibraryInfo.h>
 #include <llvm/ADT/Triple.h>
 #include <llvm/ADT/SmallSet.h>
 #include <llvm/Transforms/Scalar.h>
 #include <llvm/Transforms/IPO.h>
 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
@@ -124,6 +125,8 @@ static llvm::Pass *CreateMakeInternalFuncsStaticPass();
 static llvm::Pass *CreateDebugPass(char * output);
 static llvm::Pass *CreateReplaceExtractInsertChainsPass();
 #define DEBUG_START_PASS(NAME)                                 \
    if (g->debugPrint &&                                       \
        (getenv("FUNC") == NULL ||                             \
@@ -635,6 +638,7 @@ Optimize(llvm::Module *module, int optLevel) {
        optPM.add(CreateIsCompileTimeConstantPass(true));
        optPM.add(CreateIntrinsicsOptPass());
        optPM.add(CreateInstructionSimplifyPass());
        optPM.add(CreateReplaceExtractInsertChainsPass());
        optPM.add(llvm::createMemCpyOptPass());
        optPM.add(llvm::createSCCPPass());
@@ -4923,3 +4927,136 @@ static llvm::Pass *
 CreatePeepholePass() {
  return new PeepholePass;
 }
 ///////////////////////////////////////////////////////////////////////////
 // ReplaceExtractInsertChainsPass
 /** 
    We occassionally get chains of ExtractElementInsts followed by 
    InsertElementInsts.  Unfortunately, all of these can't be replaced by 
    ShuffleVectorInsts as we don't know that things are constant at the time.
    This Pass will detect such chains, and replace them with ShuffleVectorInsts
    if all the appropriate values are constant.
 */
 class ReplaceExtractInsertChainsPass : public llvm::BasicBlockPass {
 public:
    static char ID;
    ReplaceExtractInsertChainsPass() : BasicBlockPass(ID) {
    }
    const char *getPassName() const { return "Resolve \"replace extract insert chains\""; }
    bool runOnBasicBlock(llvm::BasicBlock &BB);
 };
 char ReplaceExtractInsertChainsPass::ID = 0;
 #include <iostream>
 /** Given an llvm::Value known to be an integer, return its value as
    an int64_t.
 */
 static int64_t
 lGetIntValue(llvm::Value *offset) {
  llvm::ConstantInt *intOffset = llvm::dyn_cast<llvm::ConstantInt>(offset);
  Assert(intOffset && (intOffset->getBitWidth() == 32 ||
                       intOffset->getBitWidth() == 64));
  return intOffset->getSExtValue();
 }
 bool
 ReplaceExtractInsertChainsPass::runOnBasicBlock(llvm::BasicBlock &bb) {
    DEBUG_START_PASS("ReplaceExtractInsertChainsPass");
    bool modifiedAny = false;
    // Initialize our mapping to the first spot in the zero vector
    int vectorWidth = g->target->getVectorWidth();
    int shuffleMap[vectorWidth];
    for (int i = 0; i < vectorWidth; i++) {
      shuffleMap[i] = vectorWidth;
    }
    // Hack-y.  16 is likely the upper limit for now.
    llvm::SmallSet<llvm::Value *, 16> inserts;
    // save the last Insert in the chain
    llvm::Value * lastInsert = NULL;
    for (llvm::BasicBlock::iterator i = bb.begin(), e = bb.end(); i != e; ++i) {
      // Iterate through the instructions looking for InsertElementInsts
      llvm::InsertElementInst *ieInst = llvm::dyn_cast<llvm::InsertElementInst>(&*i);
      if (ieInst == NULL) {
        // These aren't the instructions you're looking for.
        continue;
      }
      llvm::Value * base = ieInst->getOperand(0);
      if ( (llvm::isa<llvm::UndefValue>(base))
           || (llvm::isa<llvm::ConstantAggregateZero>(base))
           || (base == lastInsert)) {
        // if source for insert scalar is 0 or an EEInst, add insert
        llvm::Value *scalar = ieInst->getOperand(1);
        if (llvm::ExtractElementInst *eeInst = llvm::dyn_cast<llvm::ExtractElementInst>(scalar)) {
          // We're only going to deal with Inserts into a Constant vector lane
          if (llvm::isa<llvm::Constant>(eeInst->getOperand(1))) {
            inserts.insert(ieInst);
            lastInsert = ieInst;
          }
        }
        else if (llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(scalar)) {
          if (ci->isZero()) {
            inserts.insert(ieInst);
            lastInsert = ieInst;
          }
        }
        else {
          lastInsert = NULL;
        }
      }
    }
    // Look for chains, not insert/shuffle sequences
    if (inserts.size() > 1) {
      // The vector from which we're extracting elements
      llvm::Value * baseVec = NULL;
      llvm::Value *ee = llvm::cast<llvm::InsertElementInst>((*inserts.begin()))->getOperand(1);
      if (llvm::ExtractElementInst *eeInst = llvm::dyn_cast<llvm::ExtractElementInst>(ee)) {
        baseVec = eeInst->getOperand(0);
      }
      bool sameBase = true;
      for (llvm::SmallSet<llvm::Value *,16>::iterator i = inserts.begin(); i != inserts.end(); i++) {
        llvm::InsertElementInst *ie = llvm::cast<llvm::InsertElementInst>(*i);
        if (llvm::ExtractElementInst *ee = llvm::dyn_cast<llvm::ExtractElementInst>(ie->getOperand(1))) {
          if (ee->getOperand(0) != baseVec) {
            sameBase = false;
            break;
          }
          int64_t from = lGetIntValue(ee->getIndexOperand());
          int64_t to = lGetIntValue(ie->getOperand(2)); 
          shuffleMap[to] = from;
        }
      }
      if (sameBase) {
        llvm::Value *shuffleIdxs = LLVMInt32Vector(shuffleMap);
        llvm::Value *zeroVec = llvm::ConstantAggregateZero::get(shuffleIdxs->getType());
        llvm::Value *shuffle = new llvm::ShuffleVectorInst(baseVec, zeroVec, shuffleIdxs, "shiftInZero", llvm::cast<llvm::Instruction>(lastInsert));
        // For now, be lazy and let DCE clean up the Extracts/Inserts.
        lastInsert->replaceAllUsesWith(shuffle);
        modifiedAny = true;
      }
    }    
    DEBUG_END_PASS("ReplaceExtractInsertChainsPass");
    return modifiedAny;
 }
 static llvm::Pass *
 CreateReplaceExtractInsertChainsPass() {
    return new ReplaceExtractInsertChainsPass();
 }
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -170,6 +170,36 @@ static inline int64 rotate(int64 v, uniform int i) {
    return __rotate_i64(v, i);
 }
 __declspec(safe) 
 static inline float shift(float v, uniform int i) {
    return __shift_float(v, i);
 }
 __declspec(safe) 
 static inline int8 shift(int8 v, uniform int i) {
    return __shift_i8(v, i);
 }
 __declspec(safe) 
 static inline int16 shift(int16 v, uniform int i) {
    return __shift_i16(v, i);
 }
 __declspec(safe) 
 static inline int32 shift(int32 v, uniform int i) {
    return __shift_i32(v, i);
 }
 __declspec(safe) 
 static inline double shift(double v, uniform int i) {
    return __shift_double(v, i);
 }
 __declspec(safe) 
 static inline int64 shift(int64 v, uniform int i) {
    return __shift_i64(v, i);
 }
 __declspec(safe) 
 static inline float shuffle(float v, int i) {
    return __shuffle_float(v, i);