From 290032f4f52333f7fdd796c22430960c178cf12e Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 17 Oct 2011 20:25:52 -0400 Subject: [PATCH] Be more careful about using the right mask when emitting gathers. Specifically, we had been using the full mask for all gathers, rather than using the internal mask when we were loading from locally-declared arrays. Thus, given code like: uniform float x[programCount] = { .. . }; float xx = x[programIndex]; Previously we weren't generating a plain vector load to initialize xx, when this code was in a function where it wasn't known that the mask was all on, even though it should have. Now it does. --- ctx.cpp | 58 +++++++++++++++++++++++++++++++------------------------- ctx.h | 18 +++++++++--------- expr.cpp | 57 ++++++++++++++++++++++++++++++++++++++----------------- func.cpp | 4 ++-- 4 files changed, 83 insertions(+), 54 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index d73b85ff..fe0c2953 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -241,7 +241,7 @@ FunctionEmitContext::GetInternalMask() { if (VaryingCFDepth() == 0) return LLVMMaskAllOn; else - return LoadInst(internalMaskPointer, NULL, "load_mask"); + return LoadInst(internalMaskPointer, NULL, NULL, "load_mask"); } @@ -376,9 +376,9 @@ FunctionEmitContext::EndIf() { // newMask = (oldMask & ~(breakLanes | continueLanes)) llvm::Value *oldMask = GetInternalMask(); - llvm::Value *breakLanes = LoadInst(breakLanesPtr, NULL, + llvm::Value *breakLanes = LoadInst(breakLanesPtr, NULL, NULL, "break_lanes"); - llvm::Value *continueLanes = LoadInst(continueLanesPtr, NULL, + llvm::Value *continueLanes = LoadInst(continueLanesPtr, NULL, NULL, "continue_lanes"); llvm::Value *breakOrContinueLanes = BinaryOperator(llvm::Instruction::Or, breakLanes, continueLanes, @@ -455,7 +455,8 @@ FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) { // Restore the mask to the given old mask, but leave off any lanes that // executed a return statement. // newMask = (oldMask & ~returnedLanes) - llvm::Value *returnedLanes = LoadInst(returnedLanesPtr, NULL, "returned_lanes"); + llvm::Value *returnedLanes = LoadInst(returnedLanesPtr, NULL, NULL, + "returned_lanes"); llvm::Value *notReturned = NotOperator(returnedLanes, "~returned_lanes"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::And, oldMask, notReturned, "new_mask"); @@ -487,7 +488,8 @@ FunctionEmitContext::Break(bool doCoherenceCheck) { // breakLanes = breakLanes | mask assert(breakLanesPtr != NULL); llvm::Value *mask = GetInternalMask(); - llvm::Value *breakMask = LoadInst(breakLanesPtr, NULL, "break_mask"); + llvm::Value *breakMask = LoadInst(breakLanesPtr, NULL, NULL, + "break_mask"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, mask, breakMask, "mask|break_mask"); StoreInst(newMask, breakLanesPtr); @@ -536,7 +538,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) { assert(continueLanesPtr); llvm::Value *mask = GetInternalMask(); llvm::Value *continueMask = - LoadInst(continueLanesPtr, NULL, "continue_mask"); + LoadInst(continueLanesPtr, NULL, NULL, "continue_mask"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, mask, continueMask, "mask|continueMask"); StoreInst(newMask, continueLanesPtr); @@ -580,9 +582,12 @@ FunctionEmitContext::jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target) { // Check to see if (returned lanes | continued lanes | break lanes) is // equal to the value of mask at the start of the loop iteration. If // so, everyone is done and we can jump to the given target - llvm::Value *returned = LoadInst(returnedLanesPtr, NULL, "returned_lanes"); - llvm::Value *continued = LoadInst(continueLanesPtr, NULL, "continue_lanes"); - llvm::Value *breaked = LoadInst(breakLanesPtr, NULL, "break_lanes"); + llvm::Value *returned = LoadInst(returnedLanesPtr, NULL, NULL, + "returned_lanes"); + llvm::Value *continued = LoadInst(continueLanesPtr, NULL, NULL, + "continue_lanes"); + llvm::Value *breaked = LoadInst(breakLanesPtr, NULL, NULL, + "break_lanes"); llvm::Value *returnedOrContinued = BinaryOperator(llvm::Instruction::Or, returned, continued, "returned|continued"); @@ -616,7 +621,8 @@ FunctionEmitContext::RestoreContinuedLanes() { // mask = mask & continueFlags llvm::Value *mask = GetInternalMask(); - llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, "continue_mask"); + llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, NULL, + "continue_mask"); llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or, mask, continueMask, "mask|continue_mask"); SetInternalMask(orMask); @@ -672,7 +678,7 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { else { // Otherwise we update the returnedLanes value by ANDing it with // the current lane mask. - llvm::Value *oldReturnedLanes = LoadInst(returnedLanesPtr, NULL, + llvm::Value *oldReturnedLanes = LoadInst(returnedLanesPtr, NULL, NULL, "old_returned_lanes"); llvm::Value *newReturnedLanes = BinaryOperator(llvm::Instruction::Or, oldReturnedLanes, @@ -1373,8 +1379,8 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, int v0, int v1, llvm::Value * -FunctionEmitContext::LoadInst(llvm::Value *lvalue, const Type *type, - const char *name) { +FunctionEmitContext::LoadInst(llvm::Value *lvalue, llvm::Value *mask, + const Type *type, const char *name) { if (lvalue == NULL) { assert(m->errorCount > 0); return NULL; @@ -1406,16 +1412,16 @@ FunctionEmitContext::LoadInst(llvm::Value *lvalue, const Type *type, // gather path here (we can't reliably figure out all of the type // information we need from the LLVM::Type, so have to carry the // ispc type in through this path.. - assert(type != NULL); + assert(type != NULL && mask != NULL); assert(llvm::isa(lvalue->getType())); - return gather(lvalue, type, name); + return gather(lvalue, mask, type, name); } } llvm::Value * -FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, - const char *name) { +FunctionEmitContext::gather(llvm::Value *lvalue, llvm::Value *mask, + const Type *type, const char *name) { // We should have a varying lvalue if we get here... assert(llvm::dyn_cast(lvalue->getType())); @@ -1429,8 +1435,8 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, for (int i = 0; i < st->GetElementCount(); ++i) { llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i); // This in turn will be another gather - llvm::Value *eltValues = LoadInst(eltPtrs, st->GetElementType(i), - name); + llvm::Value *eltValues = + LoadInst(eltPtrs, mask, st->GetElementType(i), name); retValue = InsertInst(retValue, eltValues, i, "set_value"); } return retValue; @@ -1451,7 +1457,8 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, for (int i = 0; i < vt->GetElementCount(); ++i) { llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i); - llvm::Value *eltValues = LoadInst(eltPtrs, vt->GetBaseType(), name); + llvm::Value *eltValues = LoadInst(eltPtrs, mask, vt->GetBaseType(), + name); retValue = InsertInst(retValue, eltValues, i, "set_value"); } return retValue; @@ -1463,7 +1470,8 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, llvm::Value *retValue = llvm::UndefValue::get(retType); for (int i = 0; i < at->GetElementCount(); ++i) { llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i); - llvm::Value *eltValues = LoadInst(eltPtrs, at->GetElementType(), name); + llvm::Value *eltValues = LoadInst(eltPtrs, mask, + at->GetElementType(), name); retValue = InsertInst(retValue, eltValues, i, "set_value"); } return retValue; @@ -1473,7 +1481,6 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, // do the actual gather AddInstrumentationPoint("gather"); - llvm::Value *mask = GetFullMask(); llvm::Function *gather = NULL; // Figure out which gather function to call based on the size of // the elements. @@ -1935,17 +1942,16 @@ FunctionEmitContext::ReturnInst() { // Add a sync call at the end of any function that launched tasks SyncInst(); - const Type *returnType = function->GetReturnType(); llvm::Instruction *rinst = NULL; if (returnValuePtr != NULL) { // We have value(s) to return; load them from their storage // location - llvm::Value *retVal = LoadInst(returnValuePtr, returnType, + llvm::Value *retVal = LoadInst(returnValuePtr, NULL, NULL, "return_value"); rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock); } else { - assert(returnType == AtomicType::Void); + assert(function->GetReturnType() == AtomicType::Void); rinst = llvm::ReturnInst::Create(*g->ctx, bblock); } @@ -2016,7 +2022,7 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee, void FunctionEmitContext::SyncInst() { - llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr, NULL); + llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr, NULL, NULL); llvm::Value *nullPtrValue = llvm::Constant::getNullValue(LLVMTypes::VoidPointerType); llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, diff --git a/ctx.h b/ctx.h index da524cca..22662c59 100644 --- a/ctx.h +++ b/ctx.h @@ -342,13 +342,13 @@ public: llvm::Value *GetElementPtrInst(llvm::Value *basePtr, int v0, int v1, const char *name = NULL); - /** Load from the memory location(s) given by lvalue. The lvalue may - be varying, in which case this corresponds to a gather from the - multiple memory locations given by the array of pointer values - given by the lvalue. If the lvalue is not varying, then the type - parameter may be NULL. */ - llvm::Value *LoadInst(llvm::Value *lvalue, const Type *type, - const char *name = NULL); + /** Load from the memory location(s) given by lvalue, using the given + mask. The lvalue may be varying, in which case this corresponds to + a gather from the multiple memory locations given by the array of + pointer values given by the lvalue. If the lvalue is not varying, + then both the mask pointer and the type pointer may be NULL. */ + llvm::Value *LoadInst(llvm::Value *lvalue, llvm::Value *mask, + const Type *type, const char *name = NULL); /** Emits an alloca instruction to allocate stack storage for the given type. If a non-zero alignment is specified, the object is also @@ -521,8 +521,8 @@ private: void scatter(llvm::Value *rvalue, llvm::Value *lvalue, llvm::Value *maskPtr, const Type *rvalueType); - llvm::Value *gather(llvm::Value *lvalue, const Type *type, - const char *name); + llvm::Value *gather(llvm::Value *lvalue, llvm::Value *mask, + const Type *type, const char *name); void maskedStore(llvm::Value *rvalue, llvm::Value *lvalue, const Type *rvalueType, llvm::Value *maskPtr); }; diff --git a/expr.cpp b/expr.cpp index 1e05c47f..6e522a80 100644 --- a/expr.cpp +++ b/expr.cpp @@ -457,6 +457,15 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) { } +static llvm::Value * +lMaskForSymbol(Symbol *baseSym, FunctionEmitContext *ctx) { + llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() && + baseSym->storageClass != SC_STATIC) ? + ctx->GetInternalMask() : ctx->GetFullMask(); + return mask; +} + + /** Store the result of an assignment to the given location. */ static void @@ -479,10 +488,7 @@ lStoreAssignResult(llvm::Value *rv, llvm::Value *lv, const Type *type, ctx->StoreInst(rv, lv, LLVMMaskAllOn, type); } else { - llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() && - baseSym->storageClass != SC_STATIC) ? - ctx->GetInternalMask() : ctx->GetFullMask(); - ctx->StoreInst(rv, lv, mask, type); + ctx->StoreInst(rv, lv, lMaskForSymbol(baseSym, ctx), type); } } @@ -1540,7 +1546,8 @@ lEmitOpAssign(AssignExpr::Op op, Expr *arg0, Expr *arg1, const Type *type, // operator and load the current value on the left-hand side. llvm::Value *rvalue = arg1->GetValue(ctx); ctx->SetDebugPos(pos); - llvm::Value *oldLHS = ctx->LoadInst(lv, type, "opassign_load"); + llvm::Value *mask = lMaskForSymbol(baseSym, ctx); + llvm::Value *oldLHS = ctx->LoadInst(lv, mask, type, "opassign_load"); // Map the operator to the corresponding BinaryExpr::Op operator BinaryExpr::Op basicop; @@ -1794,7 +1801,7 @@ lEmitVaryingSelect(FunctionEmitContext *ctx, llvm::Value *test, ctx->StoreInst(expr2, resultPtr); // Use masking to conditionally store the expr1 values ctx->StoreInst(expr1, resultPtr, test, type); - return ctx->LoadInst(resultPtr, type, "selectexpr_final"); + return ctx->LoadInst(resultPtr, LLVMMaskAllOn, type, "selectexpr_final"); } @@ -2573,7 +2580,8 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { const Type *type = rt->GetReferenceTarget(); llvm::Value *ptr = ctx->AllocaInst(type->LLVMType(g->ctx), "arg"); - llvm::Value *val = ctx->LoadInst(argValue, type); + llvm::Value *mask = lMaskForSymbol(argExpr->GetBaseSymbol(), ctx); + llvm::Value *val = ctx->LoadInst(argValue, mask, type); ctx->StoreInst(val, ptr); storedArgValPtrs.push_back(ptr); argValLValues.push_back(argValue); @@ -2615,9 +2623,8 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { const ReferenceType *rt = dynamic_cast(callargs[i]->GetType()); assert(rt != NULL); - llvm::Value *load = ctx->LoadInst(ptr, rt->GetReferenceTarget(), + llvm::Value *load = ctx->LoadInst(ptr, NULL, rt->GetReferenceTarget(), "load_ref"); - Symbol *baseSym = callargs[i]->GetBaseSymbol(); lStoreAssignResult(load, argValLValues[i], rt->GetReferenceTarget(), ctx, baseSym); @@ -2885,7 +2892,8 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { ctx->SetDebugPos(pos); llvm::Value *lvalue = GetLValue(ctx); - if (!lvalue) { + llvm::Value *mask = NULL; + if (lvalue == NULL) { // We may be indexing into a temporary that hasn't hit memory, so // get the full value and stuff it into temporary alloca'd space so // that we can index from there... @@ -2900,10 +2908,16 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { ctx->StoreInst(val, ptr); ptr = lCastUniformVectorBasePtr(ptr, ctx); lvalue = ctx->GetElementPtrInst(ptr, LLVMInt32(0), index->GetValue(ctx)); + mask = LLVMMaskAllOn; + } + else { + Symbol *baseSym = GetBaseSymbol(); + assert(baseSym != NULL); + mask = lMaskForSymbol(baseSym, ctx); } ctx->SetDebugPos(pos); - return ctx->LoadInst(lvalue, GetType(), "index"); + return ctx->LoadInst(lvalue, mask, GetType(), "index"); } @@ -3172,6 +3186,7 @@ VectorMemberExpr::GetType() const { } } + llvm::Value* VectorMemberExpr::GetLValue(FunctionEmitContext* ctx) const { if (identifier.length() == 1) { @@ -3181,6 +3196,7 @@ VectorMemberExpr::GetLValue(FunctionEmitContext* ctx) const { } } + llvm::Value* VectorMemberExpr::GetValue(FunctionEmitContext* ctx) const { if (identifier.length() == 1) { @@ -3214,12 +3230,12 @@ VectorMemberExpr::GetValue(FunctionEmitContext* ctx) const { ctx->GetElementPtrInst(basePtr , 0, indices[i], "orig_offset"); llvm::Value *initValue = - ctx->LoadInst(initLValue, memberType->GetElementType(), + ctx->LoadInst(initLValue, NULL, memberType->GetElementType(), "vec_element"); ctx->StoreInst(initValue, ptmp); } - return ctx->LoadInst(ltmp, memberType, "swizzle_vec"); + return ctx->LoadInst(ltmp, NULL, memberType, "swizzle_vec"); } } @@ -3349,7 +3365,8 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const { return NULL; llvm::Value *lvalue = GetLValue(ctx); - if (!lvalue) { + llvm::Value *mask = NULL; + if (lvalue == NULL) { // As in the array case, this may be a temporary that hasn't hit // memory; get the full value and stuff it into a temporary array // so that we can index from there... @@ -3368,10 +3385,16 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const { if (elementNumber == -1) return NULL; lvalue = ctx->GetElementPtrInst(ptr, 0, elementNumber); + mask = LLVMMaskAllOn; + } + else { + Symbol *baseSym = GetBaseSymbol(); + assert(baseSym != NULL); + mask = lMaskForSymbol(baseSym, ctx); } ctx->SetDebugPos(pos); - return ctx->LoadInst(lvalue, GetType(), "structelement"); + return ctx->LoadInst(lvalue, mask, GetType(), "structelement"); } @@ -5269,7 +5292,7 @@ DereferenceExpr::GetValue(FunctionEmitContext *ctx) const { return NULL; ctx->SetDebugPos(pos); - return ctx->LoadInst(ptr, type, "reference_load"); + return ctx->LoadInst(ptr, NULL, type, "reference_load"); } @@ -5347,7 +5370,7 @@ SymbolExpr::GetValue(FunctionEmitContext *ctx) const { if (!symbol || !symbol->storagePtr) return NULL; ctx->SetDebugPos(pos); - return ctx->LoadInst(symbol->storagePtr, GetType(), symbol->name.c_str()); + return ctx->LoadInst(symbol->storagePtr, NULL, NULL, symbol->name.c_str()); } diff --git a/func.cpp b/func.cpp index 4a37994f..ed15af49 100644 --- a/func.cpp +++ b/func.cpp @@ -418,7 +418,7 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vectorLoadInst(ptr, NULL, sym->name.c_str()); + llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, NULL, sym->name.c_str()); ctx->StoreInst(ptrval, sym->storagePtr); ctx->EmitFunctionParameterDebugInfo(sym); } @@ -465,7 +465,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, // The mask is the last parameter in the argument structure llvm::Value *ptr = ctx->GetElementPtrInst(structParamPtr, 0, nArgs, "task_struct_mask"); - llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, "mask"); + llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, NULL, "mask"); ctx->SetFunctionMask(ptrval); // Copy threadIndex and threadCount into stack-allocated storage so