Incorporate per-lane offsets for varying data in the front-end.

Previously, it was only in the GatherScatterFlattenOpt optimization pass that we added the per-lane offsets when we were indexing into varying data. (Specifically, the case of float foo[]; int index; foo[index], where foo is an array of varying elements rather than uniform elements.) Now, this is done in the front-end as we're first emitting code. In addition to the basic ugliness of doing this in an optimization pass, it was also error-prone to do it there, since we no longer have access to all of the type information that's around in the front-end. No functionality or performance change.
2011-11-03 13:15:07 -07:00
parent 6084d6aeaf
commit 43a2d510bf
7 changed files with 247 additions and 129 deletions
--- a/ctx.cpp
+++ b/ctx.cpp
@@ -1302,7 +1302,7 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0
    }

    // FIXME: do we need need to handle the case of the first index being
-    // varying?  It's currently needed...
+    // varying?  It's not currently needed...
    assert(!llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(index0->getType()));

    LLVM_TYPE_CONST llvm::Type *basePtrType = basePtr->getType();
@@ -1499,6 +1499,8 @@ FunctionEmitContext::gather(llvm::Value *lvalue, llvm::Value *mask,
    }
    assert(gather != NULL);

+    lvalue = addVaryingOffsetsIfNeeded(lvalue, type);
+
    llvm::Value *voidlvalue = BitCastInst(lvalue, LLVMTypes::VoidPointerType);
    llvm::Instruction *call = CallInst(gather, voidlvalue, mask, name);
    // Add metadata about the source file location so that the
@@ -1716,6 +1718,8 @@ FunctionEmitContext::scatter(llvm::Value *rvalue, llvm::Value *lvalue,
    
    AddInstrumentationPoint("scatter");

+    lvalue = addVaryingOffsetsIfNeeded(lvalue, rvalueType);
+
    llvm::Value *voidlvalue = BitCastInst(lvalue, LLVMTypes::VoidPointerType);
    std::vector<llvm::Value *> args;
    args.push_back(voidlvalue);
@@ -2041,3 +2045,44 @@ FunctionEmitContext::SyncInst() {

    SetCurrentBasicBlock(bPostSync);
 }
+
+
+/** When we gathering from or scattering to a varying atomic type, we need
+    to add an appropraite toffset to the final address for each lane right
+    before we use it.  Given a varying pointer we're about to use and its
+    type, this function determines whether these offsets are needed and
+    returns an updated pointer that incorporates these offsets if needed.
+ */
+llvm::Value *
+FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *type) {
+    // We should only have varying pointers here, which are represented as
+    // arrays of pointers in ispc.
+    LLVM_TYPE_CONST llvm::ArrayType *at = 
+        llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(ptr->getType());
+    assert(at != NULL);
+    LLVM_TYPE_CONST llvm::PointerType *pt =
+        llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(at->getElementType());
+    assert(pt != NULL);
+
+    // If we have pointers to vector types, e.g. [8 x <8 x float> *], then
+    // the data we're gathering from/scattering to is varying in memory.
+    // If we have pointers to scalar types, e.g. [8 x float *], then the
+    // data is uniform in memory and doesn't need any additional offsets.
+    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(pt->getElementType()) == false)
+        return ptr;
+
+    llvm::Value *varyingOffsets = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
+    for (int i = 0; i < g->target.vectorWidth; ++i)
+        varyingOffsets = InsertInst(varyingOffsets, LLVMInt32(i), i,
+                                    "varying_delta");
+
+    // Cast the pointer type to the corresponding uniform type--e.g. cast
+    // <8 x float> * to float *s.
+    LLVM_TYPE_CONST llvm::Type *unifType = type->GetAsUniformType()->LLVMType(g->ctx);
+    LLVM_TYPE_CONST llvm::PointerType *ptrCastType = 
+        llvm::PointerType::get(llvm::ArrayType::get(unifType, 0), 0);
+    ptr = BitCastInst(ptr, ptrCastType, "ptr2unif");
+
+    // And now we can do the per-lane offsets...
+    return GetElementPtrInst(ptr, LLVMInt32(0), varyingOffsets);
+}
--- a/ctx.h
+++ b/ctx.h
@@ -525,6 +525,7 @@ private:
                        const Type *type, const char *name);
    void maskedStore(llvm::Value *rvalue, llvm::Value *lvalue,
                     const Type *rvalueType, llvm::Value *maskPtr);
+    llvm::Value *addVaryingOffsetsIfNeeded(llvm::Value *value, const Type *type);
 };

 #endif // ISPC_CTX_H
--- a/expr.cpp
+++ b/expr.cpp
@@ -2522,6 +2522,78 @@ lCastUniformVectorBasePtr(llvm::Value *ptr, FunctionEmitContext *ctx) {
 }


+/** When computing pointer values, we need to apply a per-lane offset when
+    we're indexing into varying data.  Consdier the following ispc code:
+
+    uniform float u[] = ...;
+    float v[] = ...;
+    int index = ...;
+    float a = u[index];
+    float b = v[index];
+
+    To compute the varying pointer that holds the addresses to load from
+    for u[index], we basically just need to multiply index element-wise by
+    sizeof(float) before doing the memory load.  For v[index], we need to
+    do the same scaling but also need to add per-lane offsets <0,
+    sizeof(float), 2*sizeof(float), ...> so that the i'th lane loads the
+    i'th of the varying values at its index value.  
+
+    This function handles figuring out when this additional offset is
+    needed and then incorporates it in the varying pointer value.
+ */ 
+static llvm::Value *
+lAddVaryingOffsetsIfNeeded(FunctionEmitContext *ctx, llvm::Value *ptr, 
+                           const Type *returnType, const Type *indexedType) {
+    // If the result of the indexing isn't a varying atomic type, then
+    // nothing to do here.
+    if (returnType->IsVaryingType() == false ||
+        dynamic_cast<const AtomicType *>(returnType) == NULL)
+        return ptr;
+
+    // We should now have an array of pointer values, represing in a
+    // varying pointer.
+    LLVM_TYPE_CONST llvm::ArrayType *at = 
+        llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(ptr->getType());
+    if (at == NULL)
+        return ptr;
+    LLVM_TYPE_CONST llvm::PointerType *pt =
+        llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(at->getElementType());
+    assert(pt != NULL);
+
+    // If the pointers are to uniform types (e.g. ptr->getType() == 
+    // [8 x float *]), then we have the u[index] situation from the comment
+    // above, and no additional offset is needed.  Otherwise we have
+    // pointers to varying atomic types--e.g. ptr->getType() == 
+    // [8 x <8 x float> *]
+    if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(pt->getElementType()) == false)
+        return ptr;
+
+    // But not so fast: if the reason we have a vector of pointers is that
+    // we're indexing into an array of uniform short-vector types, then we
+    // don't need the offsets.
+    if (dynamic_cast<const VectorType *>(indexedType) != NULL)
+        return ptr;
+
+    // Onward: compute the per lane offsets.
+    llvm::Value *varyingOffsets = 
+        llvm::UndefValue::get(LLVMTypes::Int32VectorType);
+    for (int i = 0; i < g->target.vectorWidth; ++i)
+        varyingOffsets = ctx->InsertInst(varyingOffsets, LLVMInt32(i), i,
+                                         "varying_delta");
+
+    // Cast the pointer to the corresponding uniform pointer
+    // type--e.g. from [8 x <8 x float> *] to [8 x float *].
+    LLVM_TYPE_CONST llvm::Type *unifType = 
+        returnType->GetAsUniformType()->LLVMType(g->ctx);
+    LLVM_TYPE_CONST llvm::PointerType *ptrCastType = 
+        llvm::PointerType::get(llvm::ArrayType::get(unifType, 0), 0);
+    ptr = ctx->BitCastInst(ptr, ptrCastType, "ptr2unif");
+
+    // And finally add the per-lane offsets.
+    return ctx->GetElementPtrInst(ptr, LLVMInt32(0), varyingOffsets);
+}
+
+
 llvm::Value *
 IndexExpr::GetValue(FunctionEmitContext *ctx) const {
    const Type *arrayOrVectorType;
@@ -2547,6 +2619,8 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const {
        ctx->StoreInst(val, ptr);
        ptr = lCastUniformVectorBasePtr(ptr, ctx);
        lvalue = ctx->GetElementPtrInst(ptr, LLVMInt32(0), index->GetValue(ctx));
+        lvalue = lAddVaryingOffsetsIfNeeded(ctx, lvalue, GetType(), 
+                                            arrayOrVectorType);
        mask = LLVMMaskAllOn;
    }
    else {
@@ -2593,19 +2667,20 @@ IndexExpr::GetBaseSymbol() const {

 llvm::Value *
 IndexExpr::GetLValue(FunctionEmitContext *ctx) const {
-    const Type *type;
-    if (!arrayOrVector || !index || ((type = arrayOrVector->GetType()) == NULL))
+    const Type *arrayOrVectorType;
+    if (arrayOrVector == NULL || index == NULL || 
+        ((arrayOrVectorType = arrayOrVector->GetType()) == NULL))
        return NULL;

    ctx->SetDebugPos(pos);
    llvm::Value *basePtr = NULL;
-    if (dynamic_cast<const ArrayType *>(type) ||
-        dynamic_cast<const VectorType *>(type))
+    if (dynamic_cast<const ArrayType *>(arrayOrVectorType) ||
+        dynamic_cast<const VectorType *>(arrayOrVectorType))
        basePtr = arrayOrVector->GetLValue(ctx);
    else {
-        type = type->GetReferenceTarget();
-        assert(dynamic_cast<const ArrayType *>(type) ||
-               dynamic_cast<const VectorType *>(type));
+        arrayOrVectorType = arrayOrVectorType->GetReferenceTarget();
+        assert(dynamic_cast<const ArrayType *>(arrayOrVectorType) ||
+               dynamic_cast<const VectorType *>(arrayOrVectorType));
        basePtr = arrayOrVector->GetValue(ctx);
    }
    if (!basePtr)
@@ -2614,7 +2689,8 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const {
    // If the array index is a compile time constant, check to see if it
    // may lead to an out-of-bounds access.
    ConstExpr *ce = dynamic_cast<ConstExpr *>(index);
-    const SequentialType *seqType = dynamic_cast<const SequentialType *>(type);
+    const SequentialType *seqType = 
+        dynamic_cast<const SequentialType *>(arrayOrVectorType);
    assert(seqType != NULL);
    int nElements = seqType->GetElementCount();
    if (ce != NULL && nElements > 0) {
@@ -2630,7 +2706,11 @@ IndexExpr::GetLValue(FunctionEmitContext *ctx) const {
    basePtr = lCastUniformVectorBasePtr(basePtr, ctx);

    ctx->SetDebugPos(pos);
-    return ctx->GetElementPtrInst(basePtr, LLVMInt32(0), index->GetValue(ctx));
+    llvm::Value *ptr = ctx->GetElementPtrInst(basePtr, LLVMInt32(0), 
+                                              index->GetValue(ctx));
+    ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, GetType(), arrayOrVectorType);
+
+    return ptr;
 }


@@ -2731,27 +2811,32 @@ lIdentifierToVectorElement(char id) {
    }
 }

+//////////////////////////////////////////////////
+// StructMemberExpr
+
 class StructMemberExpr : public MemberExpr
 {
 public:
    StructMemberExpr(Expr *e, const char *id, SourcePos p,
-                     SourcePos idpos, const StructType* structType);
-
-    const Type* GetType() const;
+                     SourcePos idpos, const StructType *structType);

+    const Type *GetType() const;
    int getElementNumber() const;
+    const Type *getElementType() const;

 private:
-    const StructType* exprStructType;
+    const StructType *exprStructType;
 };

+
 StructMemberExpr::StructMemberExpr(Expr *e, const char *id, SourcePos p,
                                   SourcePos idpos,
-                                   const StructType* structType)
+                                   const StructType *structType)
    : MemberExpr(e, id, p, idpos), exprStructType(structType) {
 }

-const Type*
+
+const Type *
 StructMemberExpr::GetType() const {
    // It's a struct, and the result type is the element
    // type, possibly promoted to varying if the struct type / lvalue
@@ -2780,26 +2865,35 @@ StructMemberExpr::getElementNumber() const {
    return elementNumber;
 }

+
+const Type *
+StructMemberExpr::getElementType() const {
+    return exprStructType->GetAsUniformType()->GetElementType(identifier);
+}
+
+
+//////////////////////////////////////////////////
+// VectorMemberExpr
+
 class VectorMemberExpr : public MemberExpr
 {
 public:
    VectorMemberExpr(Expr *e, const char *id, SourcePos p,
                     SourcePos idpos, const VectorType* vectorType);

-    ~VectorMemberExpr();
-
-    const Type* GetType() const;
-
-    llvm::Value* GetLValue(FunctionEmitContext* ctx) const;
-
-    llvm::Value* GetValue(FunctionEmitContext* ctx) const;
+    const Type *GetType() const;
+    llvm::Value *GetLValue(FunctionEmitContext* ctx) const;
+    llvm::Value *GetValue(FunctionEmitContext* ctx) const;

    int getElementNumber() const;
+    const Type *getElementType() const;
+
 private:
-    const VectorType* exprVectorType;
-    const VectorType* memberType;
+    const VectorType *exprVectorType;
+    const VectorType *memberType;
 };

+
 VectorMemberExpr::VectorMemberExpr(Expr *e, const char *id, SourcePos p,
                                   SourcePos idpos,
                                   const VectorType* vectorType)
@@ -2808,11 +2902,8 @@ VectorMemberExpr::VectorMemberExpr(Expr *e, const char *id, SourcePos p,
                                identifier.length());
 }

-VectorMemberExpr::~VectorMemberExpr() {
-    delete memberType;
-}

-const Type*
+const Type *
 VectorMemberExpr::GetType() const {
    // For 1-element expressions, we have the base vector element
    // type.  For n-element expressions, we have a shortvec type
@@ -2826,7 +2917,7 @@ VectorMemberExpr::GetType() const {
 }


-llvm::Value*
+llvm::Value *
 VectorMemberExpr::GetLValue(FunctionEmitContext* ctx) const {
    if (identifier.length() == 1) {
        return MemberExpr::GetLValue(ctx);
@@ -2836,11 +2927,12 @@ VectorMemberExpr::GetLValue(FunctionEmitContext* ctx) const {
 }


-llvm::Value*
+llvm::Value *
 VectorMemberExpr::GetValue(FunctionEmitContext* ctx) const {
    if (identifier.length() == 1) {
        return MemberExpr::GetValue(ctx);
-    } else {
+    } 
+    else {
        std::vector<int> indices;

        for (size_t i = 0; i < identifier.size(); ++i) {
@@ -2866,8 +2958,7 @@ VectorMemberExpr::GetValue(FunctionEmitContext* ctx) const {
            llvm::Value *ptmp =
                ctx->GetElementPtrInst(ltmp, 0, i, "new_offset");
            llvm::Value *initLValue =
-                ctx->GetElementPtrInst(basePtr , 0,
-                                       indices[i], "orig_offset");
+                ctx->GetElementPtrInst(basePtr, 0, indices[i], "orig_offset");
            llvm::Value *initValue =
                ctx->LoadInst(initLValue, NULL, memberType->GetElementType(),
                              "vec_element");
@@ -2878,6 +2969,7 @@ VectorMemberExpr::GetValue(FunctionEmitContext* ctx) const {
    }
 }

+
 int
 VectorMemberExpr::getElementNumber() const {
    int elementNumber = lIdentifierToVectorElement(identifier[0]);
@@ -2887,43 +2979,51 @@ VectorMemberExpr::getElementNumber() const {
    return elementNumber;
 }

+
+const Type *
+VectorMemberExpr::getElementType() const {
+    return memberType;
+}
+
+
+///////////////////////////////////////////////////////////////////////////
+// ReferenceMemberExpr
+
 class ReferenceMemberExpr : public MemberExpr
 {
 public:
    ReferenceMemberExpr(Expr *e, const char *id, SourcePos p,
                        SourcePos idpos, const ReferenceType* referenceType);

-    const Type* GetType() const;
+    const Type *GetType() const;
+    llvm::Value *GetLValue(FunctionEmitContext* ctx) const;

    int getElementNumber() const;
-
-    llvm::Value* GetLValue(FunctionEmitContext* ctx) const;
+    const Type *getElementType() const;

 private:
-    const ReferenceType* exprReferenceType;
-    MemberExpr* dereferencedExpr;
+    const ReferenceType *exprReferenceType;
+    MemberExpr *dereferencedExpr;
 };

 ReferenceMemberExpr::ReferenceMemberExpr(Expr *e, const char *id, SourcePos p,
                                         SourcePos idpos,
-                                         const ReferenceType* referenceType)
+                                         const ReferenceType *referenceType)
    : MemberExpr(e, id, p, idpos), exprReferenceType(referenceType) {
-    const Type* refTarget = exprReferenceType->GetReferenceTarget();
-    const StructType* structType
-        = dynamic_cast<const StructType *>(refTarget);
-    const VectorType* vectorType
-        = dynamic_cast<const VectorType *>(refTarget);
+    const Type *refTarget = exprReferenceType->GetReferenceTarget();
+    const StructType *structType = dynamic_cast<const StructType *>(refTarget);
+    const VectorType *vectorType = dynamic_cast<const VectorType *>(refTarget);

-    if (structType != NULL) {
+    if (structType != NULL)
        dereferencedExpr = new StructMemberExpr(e, id, p, idpos, structType);
-    } else if (vectorType != NULL) {
+    else if (vectorType != NULL)
        dereferencedExpr = new VectorMemberExpr(e, id, p, idpos, vectorType);
-    } else {
+    else
        dereferencedExpr = NULL;
-    }
 }

-const Type*
+
+const Type *
 ReferenceMemberExpr::GetType() const {
    if (dereferencedExpr == NULL) {
        Error(pos, "Can't access member of non-struct/vector type \"%s\".",
@@ -2934,6 +3034,7 @@ ReferenceMemberExpr::GetType() const {
    }
 }

+
 int
 ReferenceMemberExpr::getElementNumber() const {
    if (dereferencedExpr == NULL) {
@@ -2945,7 +3046,15 @@ ReferenceMemberExpr::getElementNumber() const {
    }
 }

-llvm::Value*
+
+const Type *
+ReferenceMemberExpr::getElementType() const {
+    assert(dereferencedExpr != NULL);
+    return dereferencedExpr->getElementType();
+}
+
+
+llvm::Value *
 ReferenceMemberExpr::GetLValue(FunctionEmitContext* ctx) const {
    if (dereferencedExpr == NULL) {
        // FIXME: again I think typechecking should have caught this
@@ -2965,29 +3074,35 @@ ReferenceMemberExpr::GetLValue(FunctionEmitContext* ctx) const {
        return NULL;

    ctx->SetDebugPos(pos);
-    return ctx->GetElementPtrInst(basePtr, 0, elementNumber);
+    llvm::Value *ptr = ctx->GetElementPtrInst(basePtr, 0, elementNumber);
+
+    const Type *elementType = getElementType();
+    ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, GetType(), elementType);
+
+    return ptr;
 }


-MemberExpr*
+MemberExpr *
 MemberExpr::create(Expr *e, const char *id, SourcePos p, SourcePos idpos) {
-    const Type* exprType;
+    const Type *exprType;
    if (e == NULL || (exprType = e->GetType()) == NULL)
-        return new MemberExpr(e, id, p, idpos);
+        return NULL;

-    const StructType* structType = dynamic_cast<const StructType*>(exprType);
+    const StructType *structType = dynamic_cast<const StructType*>(exprType);
    if (structType != NULL)
        return new StructMemberExpr(e, id, p, idpos, structType);

-    const VectorType* vectorType = dynamic_cast<const VectorType*>(exprType);
+    const VectorType *vectorType = dynamic_cast<const VectorType*>(exprType);
    if (vectorType != NULL)
        return new VectorMemberExpr(e, id, p, idpos, vectorType);

-    const ReferenceType* referenceType = dynamic_cast<const ReferenceType*>(exprType);
+    const ReferenceType *referenceType = dynamic_cast<const ReferenceType*>(exprType);
    if (referenceType != NULL)
        return new ReferenceMemberExpr(e, id, p, idpos, referenceType);

-    return new MemberExpr(e, id, p, idpos);
+    FATAL("Unexpected case in MemberExpr::create()");
+    return NULL;
 }


@@ -3024,6 +3139,8 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const {
        if (elementNumber == -1)
            return NULL;
        lvalue = ctx->GetElementPtrInst(ptr, 0, elementNumber);
+        lvalue = lAddVaryingOffsetsIfNeeded(ctx, lvalue, GetType(), getElementType());
+
        mask = LLVMMaskAllOn;
    }
    else {
@@ -3074,7 +3191,10 @@ MemberExpr::GetLValue(FunctionEmitContext *ctx) const {
        return NULL;

    ctx->SetDebugPos(pos);
-    return ctx->GetElementPtrInst(basePtr, 0, elementNumber);
+    llvm::Value *ptr = ctx->GetElementPtrInst(basePtr, 0, elementNumber);
+    ptr = lAddVaryingOffsetsIfNeeded(ctx, ptr, GetType(), getElementType());
+
+    return ptr;
 }


--- a/expr.h
+++ b/expr.h
@@ -303,8 +303,8 @@ public:
    Expr *TypeCheck();
    int EstimateCost() const;

-    virtual int getElementNumber() const;
-
+    virtual int getElementNumber() const = 0;
+    virtual const Type *getElementType() const = 0;
    std::string getCandidateNearMatches() const;

    Expr *expr;
--- a/opt.cpp
+++ b/opt.cpp
@@ -899,7 +899,7 @@ lGetTypeSize(LLVM_TYPE_CONST llvm::Type *type, llvm::Instruction *insertBefore)
 static llvm::Value *
 lTraverseConstantExpr(llvm::Constant *value, llvm::Value **offsetPtr,
                      LLVM_TYPE_CONST llvm::Type **scaleType, 
-                      bool *leafIsVarying, llvm::Instruction *insertBefore) {
+                      llvm::Instruction *insertBefore) {
    llvm::GlobalVariable *gv = NULL;
    llvm::ConstantExpr *ce = llvm::dyn_cast<llvm::ConstantExpr>(value);
    if (ce != NULL) {
@@ -907,7 +907,7 @@ lTraverseConstantExpr(llvm::Constant *value, llvm::Value **offsetPtr,
        case llvm::Instruction::BitCast:
            *offsetPtr = LLVMInt32(0);
            return lTraverseConstantExpr(ce->getOperand(0), offsetPtr, 
-                                         scaleType, leafIsVarying, insertBefore);
+                                         scaleType, insertBefore);
        case llvm::Instruction::GetElementPtr: {
            gv = llvm::dyn_cast<llvm::GlobalVariable>(ce->getOperand(0));
            assert(gv != NULL);
@@ -943,30 +943,13 @@ lTraverseConstantExpr(llvm::Constant *value, llvm::Value **offsetPtr,
    if (gv == NULL)
        gv = llvm::dyn_cast<llvm::GlobalVariable>(value);

-    if (gv != NULL) {
-        // FIXME: is this broken for arrays of varying???!? (I think so).
-        // IF so, then why does the other copy if it work.  (Or is that
-        // broken, too?!?!?)
-        if (leafIsVarying != NULL) {
-            LLVM_TYPE_CONST llvm::Type *pt = value->getType();
-            LLVM_TYPE_CONST llvm::PointerType *ptrType = 
-                llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(pt);
-            assert(ptrType);
-            LLVM_TYPE_CONST llvm::Type *eltType = ptrType->getElementType();
-            *leafIsVarying = llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(eltType);
-            //printf("decided that leaf %s varying!\n", *leafIsVarying ? "is" : "is not");
-        }
-
    return gv;
-    }
-
-    return NULL;
 }


 static llvm::Value *
 lGetOffsetForLane(int lane, llvm::Value *value, llvm::Value **offset, 
-                  LLVM_TYPE_CONST llvm::Type **scaleType, bool *leafIsVarying,
+                  LLVM_TYPE_CONST llvm::Type **scaleType,
                  llvm::Instruction *insertBefore) {
    if (!llvm::isa<llvm::GetElementPtrInst>(value)) {
        assert(llvm::isa<llvm::BitCastInst>(value));
@@ -986,15 +969,6 @@ lGetOffsetForLane(int lane, llvm::Value *value, llvm::Value **offset,
        value = iv->getOperand(1);
    }

-    if (leafIsVarying != NULL) {
-        LLVM_TYPE_CONST llvm::Type *pt = value->getType();
-        LLVM_TYPE_CONST llvm::PointerType *ptrType = 
-            llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(pt);
-        assert(ptrType);
-        LLVM_TYPE_CONST llvm::Type *eltType = ptrType->getElementType();
-        *leafIsVarying = llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(eltType);
-    }
-
    llvm::GetElementPtrInst *gep = llvm::dyn_cast<llvm::GetElementPtrInst>(value);
    assert(gep);

@@ -1041,18 +1015,13 @@ lGetOffsetForLane(int lane, llvm::Value *value, llvm::Value **offset,
    deconstructs the LLVM array, storing the offset from the base pointer
    as an llvm::Value for the i'th element into the i'th element of the
    offsets[] array passed in to the function.  It returns a scale factor
-    for the offsets via *scaleType, and sets *leafIsVarying to true if the
-    leaf data type being indexed into is a 'varying' ispc type.  The
-    return value is either the base pointer or the an array of pointers for
-    the next dimension of indexing (that we'll in turn deconstruct with
-    this function).
-
-    @todo All of the additional indexing magic for varying stuff should
-    happen in the front end.
+    for the offsets via *scaleType.  The return value is either the base
+    pointer or the an array of pointers for the next dimension of indexing
+    (that we'll in turn deconstruct with this function).
 */
 static llvm::Value *
 lTraverseInsertChain(llvm::Value *ptrs, llvm::Value *offsets[ISPC_MAX_NVEC],
-                     LLVM_TYPE_CONST llvm::Type **scaleType, bool *leafIsVarying,
+                     LLVM_TYPE_CONST llvm::Type **scaleType,
                     llvm::Instruction *insertBefore) {
    // The pointer values may be an array of constant pointers (this
    // happens, for example, when indexing into global arrays.)  In that
@@ -1064,8 +1033,7 @@ lTraverseInsertChain(llvm::Value *ptrs, llvm::Value *offsets[ISPC_MAX_NVEC],
        llvm::Value *base = NULL;
        for (int i = 0; i < g->target.vectorWidth; ++i) {
            llvm::Value *b = lTraverseConstantExpr(ca->getOperand(i), &offsets[i],
-                                                   scaleType, leafIsVarying, 
-                                                   insertBefore);
+                                                   scaleType, insertBefore);
            if (i == 0) 
                base = b;
            else
@@ -1102,7 +1070,7 @@ lTraverseInsertChain(llvm::Value *ptrs, llvm::Value *offsets[ISPC_MAX_NVEC],
        // array being indexed into.
        llvm::Value *myNext = lGetOffsetForLane(elementIndex, ivInst->getOperand(1), 
                                                &offsets[elementIndex], scaleType,
-                                                leafIsVarying, insertBefore);
+                                                insertBefore);
        if (nextChain == NULL)
            nextChain = myNext;
        else
@@ -1144,7 +1112,6 @@ static llvm::Value *
 lGetPtrAndOffsets(llvm::Value *ptrs, llvm::Value **basePtr, 
                  llvm::Instruction *insertBefore, int eltSize) {
    llvm::Value *offset = LLVMInt32Vector(0);
-    bool firstLoop = true, leafIsVarying;

    while (ptrs != NULL) {
        llvm::Value *offsets[ISPC_MAX_NVEC];
@@ -1153,8 +1120,7 @@ lGetPtrAndOffsets(llvm::Value *ptrs, llvm::Value **basePtr,
        LLVM_TYPE_CONST llvm::Type *scaleType = NULL;

        llvm::Value *nextChain = 
-            lTraverseInsertChain(ptrs, offsets, &scaleType,
-                                 firstLoop ? &leafIsVarying : NULL, insertBefore);
+            lTraverseInsertChain(ptrs, offsets, &scaleType, insertBefore);

        for (int i = 0; i < g->target.vectorWidth; ++i)
            assert(offsets[i] != NULL);
@@ -1185,22 +1151,6 @@ lGetPtrAndOffsets(llvm::Value *ptrs, llvm::Value **basePtr,
            *basePtr = nextChain;
            break;
        }
-        firstLoop = false;
-    }
-
-    // handle varying stuff...
-    if (leafIsVarying) {
-        llvm::Value *deltaVector = llvm::UndefValue::get(LLVMTypes::Int32VectorType);
-        for (int i = 0; i < g->target.vectorWidth; ++i) {
-            deltaVector = 
-                llvm::InsertElementInst::Create(deltaVector, LLVMInt32(eltSize*i),
-                                                LLVMInt32(i), "delta", insertBefore);
-            lCopyMetadata(deltaVector, insertBefore);
-        }
-        offset = llvm::BinaryOperator::Create(llvm::Instruction::Add, offset, 
-                                              deltaVector, "offset_varying_delta", 
-                                              insertBefore);
-        lCopyMetadata(offset, insertBefore);
    }

    return offset;
--- a/tests/struct-test-118.ispc
+++ b/tests/struct-test-118.ispc
@@ -6,15 +6,17 @@ struct Foo {
    float x;
    float f;
 };
+
 export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
    float a = aFOO[programIndex];
-    uniform Foo myFoo[3] = { { a, a }, {a, a}, {a, a} };
-    int i = 1;
+    uniform Foo myFoo[3] = { { -1, -2 }, {a, -3}, {-4, -5} };
+    int i = aFOO[0];
    varying Foo barFoo = myFoo[i];
+//CO    print("% %\n", myFoo[i].x, barFoo.x);
    RET[programIndex] = barFoo.x;
 }


-export void result(uniform float RET[4]) {
+export void result(uniform float RET[]) {
    RET[programIndex] = 1+programIndex;
 }
--- a/tests/unif-struct-test-114.ispc
+++ b/tests/unif-struct-test-114.ispc
@@ -12,12 +12,12 @@ export void f_fi(uniform float RET[], uniform float a[], uniform int bFOO[]) {
    uniform int i;
    for (i = 0; i < 17; ++i) {
        myFoo[i].x = i;
-        myFoo[i].f = 2*i;
+        myFoo[i].f = 17+2*i;
    }
    RET[programIndex] = myFoo[b/2].f;
 }


 export void result(uniform float RET[]) {
-    RET[programIndex] = 2 + 2 * programIndex;
+    RET[programIndex] = 19 + 2 * programIndex;
 }