diff --git a/ctx.cpp b/ctx.cpp index d73b85ff..fe0c2953 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -241,7 +241,7 @@ FunctionEmitContext::GetInternalMask() { if (VaryingCFDepth() == 0) return LLVMMaskAllOn; else - return LoadInst(internalMaskPointer, NULL, "load_mask"); + return LoadInst(internalMaskPointer, NULL, NULL, "load_mask"); } @@ -376,9 +376,9 @@ FunctionEmitContext::EndIf() { // newMask = (oldMask & ~(breakLanes | continueLanes)) llvm::Value *oldMask = GetInternalMask(); - llvm::Value *breakLanes = LoadInst(breakLanesPtr, NULL, + llvm::Value *breakLanes = LoadInst(breakLanesPtr, NULL, NULL, "break_lanes"); - llvm::Value *continueLanes = LoadInst(continueLanesPtr, NULL, + llvm::Value *continueLanes = LoadInst(continueLanesPtr, NULL, NULL, "continue_lanes"); llvm::Value *breakOrContinueLanes = BinaryOperator(llvm::Instruction::Or, breakLanes, continueLanes, @@ -455,7 +455,8 @@ FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) { // Restore the mask to the given old mask, but leave off any lanes that // executed a return statement. // newMask = (oldMask & ~returnedLanes) - llvm::Value *returnedLanes = LoadInst(returnedLanesPtr, NULL, "returned_lanes"); + llvm::Value *returnedLanes = LoadInst(returnedLanesPtr, NULL, NULL, + "returned_lanes"); llvm::Value *notReturned = NotOperator(returnedLanes, "~returned_lanes"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::And, oldMask, notReturned, "new_mask"); @@ -487,7 +488,8 @@ FunctionEmitContext::Break(bool doCoherenceCheck) { // breakLanes = breakLanes | mask assert(breakLanesPtr != NULL); llvm::Value *mask = GetInternalMask(); - llvm::Value *breakMask = LoadInst(breakLanesPtr, NULL, "break_mask"); + llvm::Value *breakMask = LoadInst(breakLanesPtr, NULL, NULL, + "break_mask"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, mask, breakMask, "mask|break_mask"); StoreInst(newMask, breakLanesPtr); @@ -536,7 +538,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) { assert(continueLanesPtr); llvm::Value *mask = GetInternalMask(); llvm::Value *continueMask = - LoadInst(continueLanesPtr, NULL, "continue_mask"); + LoadInst(continueLanesPtr, NULL, NULL, "continue_mask"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, mask, continueMask, "mask|continueMask"); StoreInst(newMask, continueLanesPtr); @@ -580,9 +582,12 @@ FunctionEmitContext::jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target) { // Check to see if (returned lanes | continued lanes | break lanes) is // equal to the value of mask at the start of the loop iteration. If // so, everyone is done and we can jump to the given target - llvm::Value *returned = LoadInst(returnedLanesPtr, NULL, "returned_lanes"); - llvm::Value *continued = LoadInst(continueLanesPtr, NULL, "continue_lanes"); - llvm::Value *breaked = LoadInst(breakLanesPtr, NULL, "break_lanes"); + llvm::Value *returned = LoadInst(returnedLanesPtr, NULL, NULL, + "returned_lanes"); + llvm::Value *continued = LoadInst(continueLanesPtr, NULL, NULL, + "continue_lanes"); + llvm::Value *breaked = LoadInst(breakLanesPtr, NULL, NULL, + "break_lanes"); llvm::Value *returnedOrContinued = BinaryOperator(llvm::Instruction::Or, returned, continued, "returned|continued"); @@ -616,7 +621,8 @@ FunctionEmitContext::RestoreContinuedLanes() { // mask = mask & continueFlags llvm::Value *mask = GetInternalMask(); - llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, "continue_mask"); + llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, NULL, + "continue_mask"); llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or, mask, continueMask, "mask|continue_mask"); SetInternalMask(orMask); @@ -672,7 +678,7 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { else { // Otherwise we update the returnedLanes value by ANDing it with // the current lane mask. - llvm::Value *oldReturnedLanes = LoadInst(returnedLanesPtr, NULL, + llvm::Value *oldReturnedLanes = LoadInst(returnedLanesPtr, NULL, NULL, "old_returned_lanes"); llvm::Value *newReturnedLanes = BinaryOperator(llvm::Instruction::Or, oldReturnedLanes, @@ -1373,8 +1379,8 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, int v0, int v1, llvm::Value * -FunctionEmitContext::LoadInst(llvm::Value *lvalue, const Type *type, - const char *name) { +FunctionEmitContext::LoadInst(llvm::Value *lvalue, llvm::Value *mask, + const Type *type, const char *name) { if (lvalue == NULL) { assert(m->errorCount > 0); return NULL; @@ -1406,16 +1412,16 @@ FunctionEmitContext::LoadInst(llvm::Value *lvalue, const Type *type, // gather path here (we can't reliably figure out all of the type // information we need from the LLVM::Type, so have to carry the // ispc type in through this path.. - assert(type != NULL); + assert(type != NULL && mask != NULL); assert(llvm::isa(lvalue->getType())); - return gather(lvalue, type, name); + return gather(lvalue, mask, type, name); } } llvm::Value * -FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, - const char *name) { +FunctionEmitContext::gather(llvm::Value *lvalue, llvm::Value *mask, + const Type *type, const char *name) { // We should have a varying lvalue if we get here... assert(llvm::dyn_cast(lvalue->getType())); @@ -1429,8 +1435,8 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, for (int i = 0; i < st->GetElementCount(); ++i) { llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i); // This in turn will be another gather - llvm::Value *eltValues = LoadInst(eltPtrs, st->GetElementType(i), - name); + llvm::Value *eltValues = + LoadInst(eltPtrs, mask, st->GetElementType(i), name); retValue = InsertInst(retValue, eltValues, i, "set_value"); } return retValue; @@ -1451,7 +1457,8 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, for (int i = 0; i < vt->GetElementCount(); ++i) { llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i); - llvm::Value *eltValues = LoadInst(eltPtrs, vt->GetBaseType(), name); + llvm::Value *eltValues = LoadInst(eltPtrs, mask, vt->GetBaseType(), + name); retValue = InsertInst(retValue, eltValues, i, "set_value"); } return retValue; @@ -1463,7 +1470,8 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, llvm::Value *retValue = llvm::UndefValue::get(retType); for (int i = 0; i < at->GetElementCount(); ++i) { llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i); - llvm::Value *eltValues = LoadInst(eltPtrs, at->GetElementType(), name); + llvm::Value *eltValues = LoadInst(eltPtrs, mask, + at->GetElementType(), name); retValue = InsertInst(retValue, eltValues, i, "set_value"); } return retValue; @@ -1473,7 +1481,6 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, // do the actual gather AddInstrumentationPoint("gather"); - llvm::Value *mask = GetFullMask(); llvm::Function *gather = NULL; // Figure out which gather function to call based on the size of // the elements. @@ -1935,17 +1942,16 @@ FunctionEmitContext::ReturnInst() { // Add a sync call at the end of any function that launched tasks SyncInst(); - const Type *returnType = function->GetReturnType(); llvm::Instruction *rinst = NULL; if (returnValuePtr != NULL) { // We have value(s) to return; load them from their storage // location - llvm::Value *retVal = LoadInst(returnValuePtr, returnType, + llvm::Value *retVal = LoadInst(returnValuePtr, NULL, NULL, "return_value"); rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock); } else { - assert(returnType == AtomicType::Void); + assert(function->GetReturnType() == AtomicType::Void); rinst = llvm::ReturnInst::Create(*g->ctx, bblock); } @@ -2016,7 +2022,7 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee, void FunctionEmitContext::SyncInst() { - llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr, NULL); + llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr, NULL, NULL); llvm::Value *nullPtrValue = llvm::Constant::getNullValue(LLVMTypes::VoidPointerType); llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, diff --git a/ctx.h b/ctx.h index da524cca..22662c59 100644 --- a/ctx.h +++ b/ctx.h @@ -342,13 +342,13 @@ public: llvm::Value *GetElementPtrInst(llvm::Value *basePtr, int v0, int v1, const char *name = NULL); - /** Load from the memory location(s) given by lvalue. The lvalue may - be varying, in which case this corresponds to a gather from the - multiple memory locations given by the array of pointer values - given by the lvalue. If the lvalue is not varying, then the type - parameter may be NULL. */ - llvm::Value *LoadInst(llvm::Value *lvalue, const Type *type, - const char *name = NULL); + /** Load from the memory location(s) given by lvalue, using the given + mask. The lvalue may be varying, in which case this corresponds to + a gather from the multiple memory locations given by the array of + pointer values given by the lvalue. If the lvalue is not varying, + then both the mask pointer and the type pointer may be NULL. */ + llvm::Value *LoadInst(llvm::Value *lvalue, llvm::Value *mask, + const Type *type, const char *name = NULL); /** Emits an alloca instruction to allocate stack storage for the given type. If a non-zero alignment is specified, the object is also @@ -521,8 +521,8 @@ private: void scatter(llvm::Value *rvalue, llvm::Value *lvalue, llvm::Value *maskPtr, const Type *rvalueType); - llvm::Value *gather(llvm::Value *lvalue, const Type *type, - const char *name); + llvm::Value *gather(llvm::Value *lvalue, llvm::Value *mask, + const Type *type, const char *name); void maskedStore(llvm::Value *rvalue, llvm::Value *lvalue, const Type *rvalueType, llvm::Value *maskPtr); }; diff --git a/expr.cpp b/expr.cpp index 1e05c47f..6e522a80 100644 --- a/expr.cpp +++ b/expr.cpp @@ -457,6 +457,15 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) { } +static llvm::Value * +lMaskForSymbol(Symbol *baseSym, FunctionEmitContext *ctx) { + llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() && + baseSym->storageClass != SC_STATIC) ? + ctx->GetInternalMask() : ctx->GetFullMask(); + return mask; +} + + /** Store the result of an assignment to the given location. */ static void @@ -479,10 +488,7 @@ lStoreAssignResult(llvm::Value *rv, llvm::Value *lv, const Type *type, ctx->StoreInst(rv, lv, LLVMMaskAllOn, type); } else { - llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() && - baseSym->storageClass != SC_STATIC) ? - ctx->GetInternalMask() : ctx->GetFullMask(); - ctx->StoreInst(rv, lv, mask, type); + ctx->StoreInst(rv, lv, lMaskForSymbol(baseSym, ctx), type); } } @@ -1540,7 +1546,8 @@ lEmitOpAssign(AssignExpr::Op op, Expr *arg0, Expr *arg1, const Type *type, // operator and load the current value on the left-hand side. llvm::Value *rvalue = arg1->GetValue(ctx); ctx->SetDebugPos(pos); - llvm::Value *oldLHS = ctx->LoadInst(lv, type, "opassign_load"); + llvm::Value *mask = lMaskForSymbol(baseSym, ctx); + llvm::Value *oldLHS = ctx->LoadInst(lv, mask, type, "opassign_load"); // Map the operator to the corresponding BinaryExpr::Op operator BinaryExpr::Op basicop; @@ -1794,7 +1801,7 @@ lEmitVaryingSelect(FunctionEmitContext *ctx, llvm::Value *test, ctx->StoreInst(expr2, resultPtr); // Use masking to conditionally store the expr1 values ctx->StoreInst(expr1, resultPtr, test, type); - return ctx->LoadInst(resultPtr, type, "selectexpr_final"); + return ctx->LoadInst(resultPtr, LLVMMaskAllOn, type, "selectexpr_final"); } @@ -2573,7 +2580,8 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { const Type *type = rt->GetReferenceTarget(); llvm::Value *ptr = ctx->AllocaInst(type->LLVMType(g->ctx), "arg"); - llvm::Value *val = ctx->LoadInst(argValue, type); + llvm::Value *mask = lMaskForSymbol(argExpr->GetBaseSymbol(), ctx); + llvm::Value *val = ctx->LoadInst(argValue, mask, type); ctx->StoreInst(val, ptr); storedArgValPtrs.push_back(ptr); argValLValues.push_back(argValue); @@ -2615,9 +2623,8 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { const ReferenceType *rt = dynamic_cast(callargs[i]->GetType()); assert(rt != NULL); - llvm::Value *load = ctx->LoadInst(ptr, rt->GetReferenceTarget(), + llvm::Value *load = ctx->LoadInst(ptr, NULL, rt->GetReferenceTarget(), "load_ref"); - Symbol *baseSym = callargs[i]->GetBaseSymbol(); lStoreAssignResult(load, argValLValues[i], rt->GetReferenceTarget(), ctx, baseSym); @@ -2885,7 +2892,8 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { ctx->SetDebugPos(pos); llvm::Value *lvalue = GetLValue(ctx); - if (!lvalue) { + llvm::Value *mask = NULL; + if (lvalue == NULL) { // We may be indexing into a temporary that hasn't hit memory, so // get the full value and stuff it into temporary alloca'd space so // that we can index from there... @@ -2900,10 +2908,16 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const { ctx->StoreInst(val, ptr); ptr = lCastUniformVectorBasePtr(ptr, ctx); lvalue = ctx->GetElementPtrInst(ptr, LLVMInt32(0), index->GetValue(ctx)); + mask = LLVMMaskAllOn; + } + else { + Symbol *baseSym = GetBaseSymbol(); + assert(baseSym != NULL); + mask = lMaskForSymbol(baseSym, ctx); } ctx->SetDebugPos(pos); - return ctx->LoadInst(lvalue, GetType(), "index"); + return ctx->LoadInst(lvalue, mask, GetType(), "index"); } @@ -3172,6 +3186,7 @@ VectorMemberExpr::GetType() const { } } + llvm::Value* VectorMemberExpr::GetLValue(FunctionEmitContext* ctx) const { if (identifier.length() == 1) { @@ -3181,6 +3196,7 @@ VectorMemberExpr::GetLValue(FunctionEmitContext* ctx) const { } } + llvm::Value* VectorMemberExpr::GetValue(FunctionEmitContext* ctx) const { if (identifier.length() == 1) { @@ -3214,12 +3230,12 @@ VectorMemberExpr::GetValue(FunctionEmitContext* ctx) const { ctx->GetElementPtrInst(basePtr , 0, indices[i], "orig_offset"); llvm::Value *initValue = - ctx->LoadInst(initLValue, memberType->GetElementType(), + ctx->LoadInst(initLValue, NULL, memberType->GetElementType(), "vec_element"); ctx->StoreInst(initValue, ptmp); } - return ctx->LoadInst(ltmp, memberType, "swizzle_vec"); + return ctx->LoadInst(ltmp, NULL, memberType, "swizzle_vec"); } } @@ -3349,7 +3365,8 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const { return NULL; llvm::Value *lvalue = GetLValue(ctx); - if (!lvalue) { + llvm::Value *mask = NULL; + if (lvalue == NULL) { // As in the array case, this may be a temporary that hasn't hit // memory; get the full value and stuff it into a temporary array // so that we can index from there... @@ -3368,10 +3385,16 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const { if (elementNumber == -1) return NULL; lvalue = ctx->GetElementPtrInst(ptr, 0, elementNumber); + mask = LLVMMaskAllOn; + } + else { + Symbol *baseSym = GetBaseSymbol(); + assert(baseSym != NULL); + mask = lMaskForSymbol(baseSym, ctx); } ctx->SetDebugPos(pos); - return ctx->LoadInst(lvalue, GetType(), "structelement"); + return ctx->LoadInst(lvalue, mask, GetType(), "structelement"); } @@ -5269,7 +5292,7 @@ DereferenceExpr::GetValue(FunctionEmitContext *ctx) const { return NULL; ctx->SetDebugPos(pos); - return ctx->LoadInst(ptr, type, "reference_load"); + return ctx->LoadInst(ptr, NULL, type, "reference_load"); } @@ -5347,7 +5370,7 @@ SymbolExpr::GetValue(FunctionEmitContext *ctx) const { if (!symbol || !symbol->storagePtr) return NULL; ctx->SetDebugPos(pos); - return ctx->LoadInst(symbol->storagePtr, GetType(), symbol->name.c_str()); + return ctx->LoadInst(symbol->storagePtr, NULL, NULL, symbol->name.c_str()); } diff --git a/func.cpp b/func.cpp index 4a37994f..ed15af49 100644 --- a/func.cpp +++ b/func.cpp @@ -418,7 +418,7 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vectorLoadInst(ptr, NULL, sym->name.c_str()); + llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, NULL, sym->name.c_str()); ctx->StoreInst(ptrval, sym->storagePtr); ctx->EmitFunctionParameterDebugInfo(sym); } @@ -465,7 +465,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, // The mask is the last parameter in the argument structure llvm::Value *ptr = ctx->GetElementPtrInst(structParamPtr, 0, nArgs, "task_struct_mask"); - llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, "mask"); + llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, NULL, "mask"); ctx->SetFunctionMask(ptrval); // Copy threadIndex and threadCount into stack-allocated storage so