From 3bf3ac792216217ff3c0fbe93e54b87927a992ff Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Tue, 17 Jan 2012 18:28:55 -0800 Subject: [PATCH] Be more conservative about using blending in place of masked store. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit More specifically, we do a proper masked store (rather than a load- blend-store) unless we can determine that we're accessing a stack-allocated "varying" variable. This fixes a number of nefarious bugs where given code like: uniform float a[21]; foreach (i = 0 … 21) a[i] = 0; We'd use a blend and in turn read past the end of a[] in the last iteration. Also made slight changes to inlining in aobench; this keeps compiles to ~5s, versus ~45s without them (with this change). Fixes issue #160. --- examples/aobench/ao.ispc | 4 ++-- opt.cpp | 34 ++++++++++++++++++++++++---------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/examples/aobench/ao.ispc b/examples/aobench/ao.ispc index 589a98b1..ffd85d29 100644 --- a/examples/aobench/ao.ispc +++ b/examples/aobench/ao.ispc @@ -82,7 +82,7 @@ static inline void vnormalize(vec &v) { } -static inline void +static void ray_plane_intersect(Isect &isect, Ray &ray, Plane &plane) { float d = -dot(plane.p, plane.n); float v = dot(ray.dir, plane.n); @@ -147,7 +147,7 @@ orthoBasis(vec basis[3], vec n) { } -static inline float +static float ambient_occlusion(Isect &isect, Plane &plane, Sphere spheres[3], RNGState &rngstate) { float eps = 0.0001f; diff --git a/opt.cpp b/opt.cpp index ebd6b8e0..b1f5c7a0 100644 --- a/opt.cpp +++ b/opt.cpp @@ -1674,18 +1674,32 @@ llvm::RegisterPass lms("masked-store-lower", comes from an AllocaInst. */ static bool -lIsStackVariablePointer(llvm::Value *lvalue) { +lIsSafeToBlend(llvm::Value *lvalue) { llvm::BitCastInst *bc = llvm::dyn_cast(lvalue); - if (bc) - return lIsStackVariablePointer(bc->getOperand(0)); + if (bc != NULL) + return lIsSafeToBlend(bc->getOperand(0)); else { llvm::AllocaInst *ai = llvm::dyn_cast(lvalue); - if (ai) - return true; + if (ai) { + LLVM_TYPE_CONST llvm::Type *type = ai->getType(); + LLVM_TYPE_CONST llvm::PointerType *pt = + llvm::dyn_cast(type); + assert(pt != NULL); + type = pt->getElementType(); + LLVM_TYPE_CONST llvm::ArrayType *at; + while ((at = llvm::dyn_cast(type))) { + type = at->getElementType(); + } + LLVM_TYPE_CONST llvm::VectorType *vt = + llvm::dyn_cast(type); + return (vt != NULL && + (int)vt->getNumElements() == g->target.vectorWidth); + } else { - llvm::GetElementPtrInst *gep = llvm::dyn_cast(lvalue); - if (gep) - return lIsStackVariablePointer(gep->getOperand(0)); + llvm::GetElementPtrInst *gep = + llvm::dyn_cast(lvalue); + if (gep != NULL) + return lIsSafeToBlend(gep->getOperand(0)); else return false; } @@ -1747,8 +1761,8 @@ LowerMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) { // or serializing the masked store. Even on targets with a native // masked store instruction, this is preferable since it lets us // keep values in registers rather than going out to the stack. - bool doBlend = (!g->opt.disableBlendedMaskedStores || - lIsStackVariablePointer(lvalue)); + bool doBlend = (!g->opt.disableBlendedMaskedStores && + lIsSafeToBlend(lvalue)); // Generate the call to the appropriate masked store function and // replace the __pseudo_* one with it.