From a89e26d725f210fe51fe364786c00f3ac93a5150 Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 10 Oct 2011 11:47:19 -0700 Subject: [PATCH] Improvements to mask management code; removes a number of unnecessary blends. We now maintain a the distinction between the value of the mask passed into a function and the "internal" mask within the function that only accounts for varying control flow within the function. The full mask (the AND of the function mask and the internal mask) must be used for assignments to static and global variables, and reference function parameters. Further, it is the appropriate mask to use for making decisions about varying control flow. However, we can use the internal mask for assignments to variables declared in the current function (including the return value and non-reference parameters to the function). Doing so allows us to catch a few more cases where the internal mask is all on, even if the mask coming into the function wasn't all on, and thence use moves rather than blends for those assignments. (Which in turn can allow additional optimizations to happen.) Fixes issue #23. --- ctx.cpp | 125 +++++++++++++++++++++++++++++++++++-------------------- ctx.h | 64 ++++++++++++++++++---------- expr.cpp | 49 +++++++++++++++------- func.cpp | 11 +++-- stmt.cpp | 81 +++++++++++++++++------------------ sym.cpp | 1 + sym.h | 4 ++ 7 files changed, 208 insertions(+), 127 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index ceb1d0d6..d73b85ff 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -124,10 +124,10 @@ CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget, /////////////////////////////////////////////////////////////////////////// -FunctionEmitContext::FunctionEmitContext(Function *function, Symbol *funSym, +FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, llvm::Function *llvmFunction, SourcePos firstStmtPos) { - const Type *rt = function->GetReturnType(); + function = func; /* Create a new basic block to store all of the allocas */ allocaBlock = llvm::BasicBlock::Create(*g->ctx, "allocas", llvmFunction, 0); @@ -136,9 +136,12 @@ FunctionEmitContext::FunctionEmitContext(Function *function, Symbol *funSym, llvm::BranchInst::Create(bblock, allocaBlock); funcStartPos = funSym->pos; - returnType = rt; - maskPtr = NULL; - entryMask = NULL; + + internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory"); + StoreInst(LLVMMaskAllOn, internalMaskPointer); + functionMaskValue = LLVMMaskAllOn; + fullMaskPointer = NULL; + loopMask = NULL; breakLanesPtr = continueLanesPtr = NULL; breakTarget = continueTarget = NULL; @@ -151,6 +154,7 @@ FunctionEmitContext::FunctionEmitContext(Function *function, Symbol *funSym, StoreInst(llvm::Constant::getNullValue(LLVMTypes::VoidPointerType), launchGroupHandlePtr); + const Type *returnType = function->GetReturnType(); if (!returnType || returnType == AtomicType::Void) returnValuePtr = NULL; else { @@ -164,7 +168,7 @@ FunctionEmitContext::FunctionEmitContext(Function *function, Symbol *funSym, /* If debugging is enabled, tell the debug information emission code about this new function */ diFile = funcStartPos.GetDIFile(); - llvm::DIType retType = rt->GetDIType(diFile); + llvm::DIType retType = function->GetReturnType()->GetDIType(diFile); int flags = llvm::DIDescriptor::FlagPrototyped; // ?? diFunction = m->diBuilder->createFunction(diFile, /* scope */ llvmFunction->getName(), // mangled @@ -208,6 +212,12 @@ FunctionEmitContext::~FunctionEmitContext() { } +const Function * +FunctionEmitContext::GetFunction() const { + return function; +} + + llvm::BasicBlock * FunctionEmitContext::GetCurrentBasicBlock() { return bblock; @@ -221,21 +231,41 @@ FunctionEmitContext::SetCurrentBasicBlock(llvm::BasicBlock *bb) { llvm::Value * -FunctionEmitContext::GetMask() { - return LoadInst(maskPtr, NULL, "load_mask"); +FunctionEmitContext::GetFunctionMask() { + return functionMaskValue; +} + + +llvm::Value * +FunctionEmitContext::GetInternalMask() { + if (VaryingCFDepth() == 0) + return LLVMMaskAllOn; + else + return LoadInst(internalMaskPointer, NULL, "load_mask"); +} + + +llvm::Value * +FunctionEmitContext::GetFullMask() { + llvm::Value *internalMask = GetInternalMask(); + if (internalMask == LLVMMaskAllOn && functionMaskValue == LLVMMaskAllOn) + return LLVMMaskAllOn; + else + return BinaryOperator(llvm::Instruction::And, GetInternalMask(), + functionMaskValue, "internal_mask&function_mask"); } void FunctionEmitContext::SetMaskPointer(llvm::Value *p) { - maskPtr = p; + fullMaskPointer = p; } void -FunctionEmitContext::SetEntryMask(llvm::Value *value) { - entryMask = value; - SetMask(value); +FunctionEmitContext::SetFunctionMask(llvm::Value *value) { + functionMaskValue = value; + StoreInst(GetFullMask(), fullMaskPointer); } @@ -246,33 +276,35 @@ FunctionEmitContext::SetLoopMask(llvm::Value *value) { void -FunctionEmitContext::SetMask(llvm::Value *value) { - StoreInst(value, maskPtr); +FunctionEmitContext::SetInternalMask(llvm::Value *value) { + StoreInst(value, internalMaskPointer); + // kludge so that __mask returns the right value in ispc code. + StoreInst(GetFullMask(), fullMaskPointer); } void -FunctionEmitContext::MaskAnd(llvm::Value *oldMask, llvm::Value *test) { +FunctionEmitContext::SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *test) { llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, test, "oldMask&test"); - SetMask(mask); + SetInternalMask(mask); } void -FunctionEmitContext::MaskAndNot(llvm::Value *oldMask, llvm::Value *test) { +FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test) { llvm::Value *notTest = BinaryOperator(llvm::Instruction::Xor, test, LLVMMaskAllOn, "~test"); llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, notTest, "oldMask&~test"); - SetMask(mask); + SetInternalMask(mask); } void FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) { assert(bblock != NULL); - llvm::Value *any = Any(GetMask()); + llvm::Value *any = Any(GetFullMask()); BranchInst(btrue, bfalse, any); // It's illegal to add any additional instructions to the basic block // now that it's terminated, so set bblock to NULL to be safe @@ -283,7 +315,7 @@ FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock * void FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) { assert(bblock != NULL); - llvm::Value *all = All(GetMask()); + llvm::Value *all = All(GetFullMask()); BranchInst(btrue, bfalse, all); // It's illegal to add any additional instructions to the basic block // now that it's terminated, so set bblock to NULL to be safe @@ -303,8 +335,8 @@ FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock void -FunctionEmitContext::StartUniformIf(llvm::Value *oldMask) { - controlFlowInfo.push_back(CFInfo::GetIf(true, oldMask)); +FunctionEmitContext::StartUniformIf() { + controlFlowInfo.push_back(CFInfo::GetIf(true, GetInternalMask())); } @@ -343,7 +375,7 @@ FunctionEmitContext::EndIf() { assert(continueLanesPtr != NULL); // newMask = (oldMask & ~(breakLanes | continueLanes)) - llvm::Value *oldMask = GetMask(); + llvm::Value *oldMask = GetInternalMask(); llvm::Value *breakLanes = LoadInst(breakLanesPtr, NULL, "break_lanes"); llvm::Value *continueLanes = LoadInst(continueLanesPtr, NULL, @@ -356,7 +388,7 @@ FunctionEmitContext::EndIf() { llvm::Value *newMask = BinaryOperator(llvm::Instruction::And, oldMask, notBreakOrContinue, "new_mask"); - SetMask(newMask); + SetInternalMask(newMask); } } } @@ -364,9 +396,10 @@ FunctionEmitContext::EndIf() { void FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct, - bool uniformCF, llvm::Value *oldMask) { + bool uniformCF) { // Store the current values of various loop-related state so that we // can restore it when we exit this loop. + llvm::Value *oldMask = GetInternalMask(); controlFlowInfo.push_back(CFInfo::GetLoop(uniformCF, breakTarget, continueTarget, breakLanesPtr, continueLanesPtr, oldMask, loopMask)); @@ -426,7 +459,7 @@ FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) { llvm::Value *notReturned = NotOperator(returnedLanes, "~returned_lanes"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::And, oldMask, notReturned, "new_mask"); - SetMask(newMask); + SetInternalMask(newMask); } @@ -440,7 +473,7 @@ FunctionEmitContext::Break(bool doCoherenceCheck) { // If all of the enclosing 'if' tests in the loop have uniform control // flow or if we can tell that the mask is all on, then we can just // jump to the break location. - if (ifsInLoopAllUniform() || GetMask() == LLVMMaskAllOn) { + if (ifsInLoopAllUniform() || GetInternalMask() == LLVMMaskAllOn) { BranchInst(breakTarget); if (ifsInLoopAllUniform() && doCoherenceCheck) Warning(currentPos, "Coherent break statement not necessary in fully uniform " @@ -453,7 +486,7 @@ FunctionEmitContext::Break(bool doCoherenceCheck) { // executed a 'break' statement: // breakLanes = breakLanes | mask assert(breakLanesPtr != NULL); - llvm::Value *mask = GetMask(); + llvm::Value *mask = GetInternalMask(); llvm::Value *breakMask = LoadInst(breakLanesPtr, NULL, "break_mask"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, mask, breakMask, "mask|break_mask"); @@ -463,7 +496,7 @@ FunctionEmitContext::Break(bool doCoherenceCheck) { // statements in the same scope after the 'break'. Most of time // this will be optimized away since we'll likely end the scope of // an 'if' statement and restore the mask then. - SetMask(LLVMMaskAllOff); + SetInternalMask(LLVMMaskAllOff); if (doCoherenceCheck) // If the user has indicated that this is a 'coherent' break @@ -486,7 +519,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) { return; } - if (ifsInLoopAllUniform() || GetMask() == LLVMMaskAllOn) { + if (ifsInLoopAllUniform() || GetInternalMask() == LLVMMaskAllOn) { // Similarly to 'break' statements, we can immediately jump to the // continue target if we're only in 'uniform' control flow within // loop or if we can tell that the mask is all on. @@ -501,7 +534,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) { // Otherwise update the stored value of which lanes have 'continue'd. // continueLanes = continueLanes | mask assert(continueLanesPtr); - llvm::Value *mask = GetMask(); + llvm::Value *mask = GetInternalMask(); llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, "continue_mask"); llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or, @@ -510,7 +543,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) { // And set the current mask to be all off in case there are any // statements in the same scope after the 'continue' - SetMask(LLVMMaskAllOff); + SetInternalMask(LLVMMaskAllOff); if (doCoherenceCheck) // If this is a 'coherent continue' statement, then emit the @@ -582,11 +615,11 @@ FunctionEmitContext::RestoreContinuedLanes() { return; // mask = mask & continueFlags - llvm::Value *mask = GetMask(); + llvm::Value *mask = GetInternalMask(); llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, "continue_mask"); llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or, mask, continueMask, "mask|continue_mask"); - SetMask(orMask); + SetInternalMask(orMask); // continueLanes = 0 StoreInst(LLVMMaskAllOff, continueLanesPtr); @@ -605,6 +638,7 @@ FunctionEmitContext::VaryingCFDepth() const { void FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { + const Type *returnType = function->GetReturnType(); if (returnType == AtomicType::Void) { if (expr != NULL) Error(expr->pos, "Can't return non-void type \"%s\" from void function.", @@ -623,7 +657,8 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { Expr *r = expr->TypeConv(returnType, "return statement"); if (r != NULL) { llvm::Value *retVal = r->GetValue(this); - StoreInst(retVal, returnValuePtr, GetMask(), returnType); + if (retVal != NULL) + StoreInst(retVal, returnValuePtr, GetInternalMask(), returnType); } } @@ -639,15 +674,16 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { // the current lane mask. llvm::Value *oldReturnedLanes = LoadInst(returnedLanesPtr, NULL, "old_returned_lanes"); - llvm::Value *newReturnedLanes = BinaryOperator(llvm::Instruction::Or, - oldReturnedLanes, - GetMask(), "old_mask|returned_lanes"); + llvm::Value *newReturnedLanes = + BinaryOperator(llvm::Instruction::Or, oldReturnedLanes, + GetInternalMask(), "old_mask|returned_lanes"); // For 'coherent' return statements, emit code to check if all // lanes have returned if (doCoherenceCheck) { - // if newReturnedLanes == entryMask, get out of here! - llvm::Value *cmp = MasksAllEqual(entryMask, newReturnedLanes); + // if newReturnedLanes == functionMaskValue, get out of here! + llvm::Value *cmp = MasksAllEqual(functionMaskValue, + newReturnedLanes); llvm::BasicBlock *bDoReturn = CreateBasicBlock("do_return"); llvm::BasicBlock *bNoReturn = CreateBasicBlock("no_return"); BranchInst(bDoReturn, bNoReturn, cmp); @@ -663,7 +699,7 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) { // same scope after the return have no effect StoreInst(newReturnedLanes, returnedLanesPtr); AddInstrumentationPoint("return: some but not all lanes have returned"); - SetMask(LLVMMaskAllOff); + SetInternalMask(LLVMMaskAllOff); } } @@ -812,7 +848,7 @@ FunctionEmitContext::AddInstrumentationPoint(const char *note) { // arg 3: line number args.push_back(LLVMInt32(currentPos.first_line)); // arg 4: current mask, movmsk'ed down to an int32 - args.push_back(LaneMask(GetMask())); + args.push_back(LaneMask(GetFullMask())); llvm::Function *finst = m->module->getFunction("ISPCInstrument"); CallInst(finst, args, ""); @@ -1437,7 +1473,7 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type, // do the actual gather AddInstrumentationPoint("gather"); - llvm::Value *mask = GetMask(); + llvm::Value *mask = GetFullMask(); llvm::Function *gather = NULL; // Figure out which gather function to call based on the size of // the elements. @@ -1899,6 +1935,7 @@ FunctionEmitContext::ReturnInst() { // Add a sync call at the end of any function that launched tasks SyncInst(); + const Type *returnType = function->GetReturnType(); llvm::Instruction *rinst = NULL; if (returnValuePtr != NULL) { // We have value(s) to return; load them from their storage @@ -1957,7 +1994,7 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee, } // copy in the mask - llvm::Value *mask = GetMask(); + llvm::Value *mask = GetFullMask(); llvm::Value *ptr = GetElementPtrInst(argmem, 0, argVals.size(), "funarg_mask"); StoreInst(mask, ptr); diff --git a/ctx.h b/ctx.h index 9e823b8e..da524cca 100644 --- a/ctx.h +++ b/ctx.h @@ -71,6 +71,10 @@ public: SourcePos firstStmtPos); ~FunctionEmitContext(); + /** Returns the Function * corresponding to the function that we're + currently generating code for. */ + const Function *GetFunction() const; + /** @name Current basic block management @{ */ @@ -84,22 +88,33 @@ public: /** @name Mask management @{ */ - /** Returns the current mask value */ - llvm::Value *GetMask(); + /** Returns the mask value at entry to the current function. */ + llvm::Value *GetFunctionMask(); + /** Returns the mask value corresponding to "varying" control flow + within the current function. (i.e. this doesn't include the effect + of the mask at function entry. */ + llvm::Value *GetInternalMask(); + + /** Returns the complete current mask value--i.e. the logical AND of + the function entry mask and the internal mask. */ + llvm::Value *GetFullMask(); + + /** Provides the alloca'd pointer to memory to store the full function + mask. This is only used to wire up the __mask builtin variable. */ void SetMaskPointer(llvm::Value *p); /** Provides the value of the mask at function entry */ - void SetEntryMask(llvm::Value *val); + void SetFunctionMask(llvm::Value *val); - /** Sets the mask to a new value */ - void SetMask(llvm::Value *val); + /** Sets the internal mask to a new value */ + void SetInternalMask(llvm::Value *val); - /** Sets the mask to (oldMask & val) */ - void MaskAnd(llvm::Value *oldMask, llvm::Value *val); + /** Sets the internal mask to (oldMask & val) */ + void SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *val); - /** Sets the mask to (oldMask & ~val) */ - void MaskAndNot(llvm::Value *oldMask, llvm::Value *test); + /** Sets the internal mask to (oldMask & ~val) */ + void SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test); /** Emits a branch instruction to the basic block btrue if any of the lanes of current mask are on and bfalse if none are on. */ @@ -118,9 +133,8 @@ public: @{ */ /** Notifies the FunctionEmitContext that we're starting emission of an - 'if' statement with a uniform test. The value of the mask going - into the 'if' statement is provided in the oldMask parameter. */ - void StartUniformIf(llvm::Value *oldMask); + 'if' statement with a uniform test. */ + void StartUniformIf(); /** Notifies the FunctionEmitContext that we're starting emission of an 'if' statement with a varying test. The value of the mask going @@ -135,10 +149,9 @@ public: for a loop. Basic blocks are provides for where 'break' and 'continue' statements should jump to (if all running lanes want to break or continue), uniformControlFlow indicates whether the loop - condition is 'uniform', and oldMask provides the current mask going - into the loop. */ + condition is 'uniform'. */ void StartLoop(llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget, - bool uniformControlFlow, llvm::Value *oldMask); + bool uniformControlFlow); /** Informs FunctionEmitContext of the value of the mask at the start of a loop body. */ @@ -404,6 +417,9 @@ public: /** @} */ private: + /** Pointer to the Function for which we're currently generating code. */ + Function *function; + /** The basic block into which we add any alloca instructions that need to go at the very start of the function. */ llvm::BasicBlock *allocaBlock; @@ -413,8 +429,16 @@ private: llvm::BasicBlock *bblock; /** Pointer to stack-allocated memory that stores the current value of - the program mask. */ - llvm::Value *maskPtr; + the full program mask. */ + llvm::Value *fullMaskPointer; + + /** Pointer to stack-allocated memory that stores the current value of + the program mask representing varying control flow within the + function. */ + llvm::Value *internalMaskPointer; + + /** Value of the program mask when the function starts execution. */ + llvm::Value *functionMaskValue; /** Current source file position; if debugging information is being generated, this position is used to set file/line information for @@ -425,12 +449,6 @@ private: for error messages and debugging symbols. */ SourcePos funcStartPos; - /** Type of result that the current function returns. */ - const Type *returnType; - - /** Value of the program mask when the function starts execution. */ - llvm::Value *entryMask; - /** If currently in a loop body, the value of the mask at the start of the loop. */ llvm::Value *loopMask; diff --git a/expr.cpp b/expr.cpp index 5526ecd0..d90bc18b 100644 --- a/expr.cpp +++ b/expr.cpp @@ -505,7 +505,12 @@ lEmitPrePostIncDec(UnaryExpr::Op op, Expr *expr, SourcePos pos, #endif // And store the result out to the lvalue - ctx->StoreInst(binop, lvalue, ctx->GetMask(), type); + Symbol *baseSym = expr->GetBaseSymbol(); + assert(baseSym != NULL); + llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() && + baseSym->storageClass != SC_STATIC) ? + ctx->GetInternalMask() : ctx->GetFullMask(); + ctx->StoreInst(binop, lvalue, mask, type); // And then if it's a pre increment/decrement, return the final // computed result; otherwise return the previously-grabbed expression @@ -1509,8 +1514,12 @@ lStoreAssignResult(llvm::Value *rv, llvm::Value *lv, const Type *type, // goes out of scope. ctx->StoreInst(rv, lv, LLVMMaskAllOn, type); } - else - ctx->StoreInst(rv, lv, ctx->GetMask(), type); + else { + llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() && + baseSym->storageClass != SC_STATIC) ? + ctx->GetInternalMask() : ctx->GetFullMask(); + ctx->StoreInst(rv, lv, mask, type); + } } @@ -1844,12 +1853,12 @@ SelectExpr::GetValue(FunctionEmitContext *ctx) const { // element-wise select to get the result llvm::Value *testVal = test->GetValue(ctx); assert(testVal->getType() == LLVMTypes::MaskType); - llvm::Value *oldMask = ctx->GetMask(); - ctx->MaskAnd(oldMask, testVal); + llvm::Value *oldMask = ctx->GetInternalMask(); + ctx->SetInternalMaskAnd(oldMask, testVal); llvm::Value *expr1Val = expr1->GetValue(ctx); - ctx->MaskAndNot(oldMask, testVal); + ctx->SetInternalMaskAndNot(oldMask, testVal); llvm::Value *expr2Val = expr2->GetValue(ctx); - ctx->SetMask(oldMask); + ctx->SetInternalMask(oldMask); return lEmitVaryingSelect(ctx, testVal, expr1Val, expr2Val, type); } @@ -2415,7 +2424,7 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { callargs.size() == callee->arg_size()); if (callargs.size() + 1 == callee->arg_size()) - argVals.push_back(ctx->GetMask()); + argVals.push_back(ctx->GetFullMask()); retVal = ctx->CallInst(callee, argVals, isVoidFunc ? "" : "calltmp"); } @@ -2430,10 +2439,19 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { assert(rt != NULL); llvm::Value *load = ctx->LoadInst(ptr, rt->GetReferenceTarget(), "load_ref"); - // FIXME: apply the "don't do blending" optimization here if - // appropriate? - ctx->StoreInst(load, argValLValues[i], ctx->GetMask(), - rt->GetReferenceTarget()); + + Symbol *baseSym = callargs[i]->GetBaseSymbol(); + assert(baseSym != NULL); +//CO if (baseSym->varyingCFDepth == ctx->VaryingCFDepth() && +//CO baseSym->storageClass != SC_STATIC) +//CO ctx->StoreInst(load, argValLValues[i], LLVMMaskAllOn, +//CO rt->GetReferenceTarget()); +//CO else { + llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() && + baseSym->storageClass != SC_STATIC) ? + ctx->GetInternalMask() : ctx->GetFullMask(); + ctx->StoreInst(load, argValLValues[i], mask, rt->GetReferenceTarget()); +//CO } } } @@ -5206,10 +5224,9 @@ SymbolExpr::Optimize() { int SymbolExpr::EstimateCost() const { - if (symbol->constValue != NULL) - return 0; - else - return COST_LOAD; + // Be optimistic and assume it's in a register or can be used as a + // memory operand.. + return 0; } diff --git a/func.cpp b/func.cpp index fc33b8f3..12fb68a1 100644 --- a/func.cpp +++ b/func.cpp @@ -104,7 +104,10 @@ Function::Function(DeclSpecs *ds, Declarator *decl, Stmt *c) { for (unsigned int i = 0; i < decl->functionArgs->size(); ++i) { Declaration *pdecl = (*decl->functionArgs)[i]; assert(pdecl->declarators.size() == 1); - args.push_back(pdecl->declarators[0]->sym); + Symbol *sym = pdecl->declarators[0]->sym; + if (dynamic_cast(sym->type) == NULL) + sym->parentFunction = this; + args.push_back(sym); } } @@ -462,7 +465,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, llvm::Value *ptr = ctx->GetElementPtrInst(structParamPtr, 0, nArgs, "task_struct_mask"); llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, "mask"); - ctx->SetEntryMask(ptrval); + ctx->SetFunctionMask(ptrval); // Copy threadIndex and threadCount into stack-allocated storage so // that their symbols point to something reasonable. @@ -500,12 +503,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, // happens for exmaple with 'export'ed functions that the app // calls. if (argIter == function->arg_end()) - ctx->SetEntryMask(LLVMMaskAllOn); + ctx->SetFunctionMask(LLVMMaskAllOn); else { // Otherwise use the mask to set the entry mask value argIter->setName("__mask"); assert(argIter->getType() == LLVMTypes::MaskType); - ctx->SetEntryMask(argIter); + ctx->SetFunctionMask(argIter); assert(++argIter == function->arg_end()); } } diff --git a/stmt.cpp b/stmt.cpp index 6be10752..bfd50950 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -322,6 +322,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { // this before the initializer stuff. ctx->EmitVariableDebugInfo(sym); // And then get it initialized... + sym->parentFunction = ctx->GetFunction(); lInitSymbol(sym->storagePtr, sym->name.c_str(), type, decl->initExpr, ctx, sym->pos); } @@ -461,7 +462,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const { return; if (isUniform) { - ctx->StartUniformIf(ctx->GetMask()); + ctx->StartUniformIf(); if (doAllCheck) Warning(test->pos, "Uniform condition supplied to \"cif\" statement."); @@ -571,14 +572,14 @@ void IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask, llvm::Value *test) const { if (trueStmts) { - ctx->MaskAnd(oldMask, test); + ctx->SetInternalMaskAnd(oldMask, test); lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements"); // under varying control flow,, returns can't stop instruction // emission, so this better be non-NULL... assert(ctx->GetCurrentBasicBlock()); } if (falseStmts) { - ctx->MaskAndNot(oldMask, test); + ctx->SetInternalMaskAndNot(oldMask, test); lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements"); assert(ctx->GetCurrentBasicBlock()); } @@ -764,8 +765,8 @@ lSafeToRunWithAllLanesOff(Stmt *stmt) { */ void IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const { - llvm::Value *oldMask = ctx->GetMask(); - if (oldMask == LLVMMaskAllOn) { + llvm::Value *oldMask = ctx->GetInternalMask(); + if (ctx->GetFullMask() == LLVMMaskAllOn) { // We can tell that the mask is on statically at compile time; just // emit code for the 'if test with the mask all on' path llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done"); @@ -782,18 +783,11 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const { llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done"); // Jump to either bAllOn or bMixedOn, depending on the mask's value - llvm::Value *maskAllQ = ctx->All(oldMask); + llvm::Value *maskAllQ = ctx->All(ctx->GetFullMask()); ctx->BranchInst(bAllOn, bMixedOn, maskAllQ); // Emit code for the 'mask all on' case ctx->SetCurrentBasicBlock(bAllOn); - // We start by explicitly storing "all on" into the mask mask. - // Note that this doesn't change its actual value, but doing so - // lets the compiler see what's going on so that subsequent - // optimizations for code emitted here can operate with the - // knowledge that the mask is definitely all on (until it modifies - // the mask itself). - ctx->SetMask(LLVMMaskAllOn); emitMaskAllOn(ctx, ltest, bDone); // And emit code for the mixed mask case @@ -845,11 +839,20 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const { /** Emits code for 'if' tests under the case where we know that the program - mask is all on. + mask is all on going into the 'if'. */ void IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest, llvm::BasicBlock *bDone) const { + // We start by explicitly storing "all on" into the mask mask. Note + // that this doesn't change its actual value, but doing so lets the + // compiler see what's going on so that subsequent optimizations for + // code emitted here can operate with the knowledge that the mask is + // definitely all on (until it modifies the mask itself). + ctx->SetInternalMask(LLVMMaskAllOn); + llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); + ctx->SetFunctionMask(LLVMMaskAllOn); + // First, check the value of the test. If it's all on, then we jump to // a basic block that will only have code for the true case. llvm::BasicBlock *bTestAll = ctx->CreateBasicBlock("cif_test_all"); @@ -896,6 +899,9 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest, assert(ctx->GetCurrentBasicBlock()); ctx->EndIf(); ctx->BranchInst(bDone); + + ctx->SetCurrentBasicBlock(bDone); + ctx->SetFunctionMask(oldFunctionMask); } @@ -909,11 +915,11 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask, llvm::BasicBlock *bNext = ctx->CreateBasicBlock("safe_if_after_true"); if (trueStmts != NULL) { llvm::BasicBlock *bRunTrue = ctx->CreateBasicBlock("safe_if_run_true"); - ctx->MaskAnd(oldMask, ltest); + ctx->SetInternalMaskAnd(oldMask, ltest); // Do any of the program instances want to run the 'true' // block? If not, jump ahead to bNext. - llvm::Value *maskAnyQ = ctx->Any(ctx->GetMask()); + llvm::Value *maskAnyQ = ctx->Any(ctx->GetFullMask()); ctx->BranchInst(bRunTrue, bNext, maskAnyQ); // Emit statements for true @@ -926,11 +932,11 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask, if (falseStmts != NULL) { llvm::BasicBlock *bRunFalse = ctx->CreateBasicBlock("safe_if_run_false"); bNext = ctx->CreateBasicBlock("safe_if_after_false"); - ctx->MaskAndNot(oldMask, ltest); + ctx->SetInternalMaskAndNot(oldMask, ltest); // Similarly, check to see if any of the instances want to // run the 'false' block... - llvm::Value *maskAnyQ = ctx->Any(ctx->GetMask()); + llvm::Value *maskAnyQ = ctx->Any(ctx->GetFullMask()); ctx->BranchInst(bRunFalse, bNext, maskAnyQ); // Emit code for false @@ -1019,14 +1025,14 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const { llvm::BasicBlock *bexit = ctx->CreateBasicBlock("do_exit"); llvm::BasicBlock *btest = ctx->CreateBasicBlock("do_test"); - ctx->StartLoop(bexit, btest, uniformTest, ctx->GetMask()); + ctx->StartLoop(bexit, btest, uniformTest); // Start by jumping into the loop body ctx->BranchInst(bloop); // And now emit code for the loop body ctx->SetCurrentBasicBlock(bloop); - ctx->SetLoopMask(ctx->GetMask()); + ctx->SetLoopMask(ctx->GetInternalMask()); ctx->SetDebugPos(pos); // FIXME: in the StmtList::EmitCode() method takes starts/stops a new // scope around the statements in the list. So if the body is just a @@ -1047,10 +1053,13 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const { // IfStmt::emitCoherentTests()), and then emit the code for the // loop body. ctx->SetCurrentBasicBlock(bAllOn); - ctx->SetMask(LLVMMaskAllOn); + ctx->SetInternalMask(LLVMMaskAllOn); + llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); + ctx->SetFunctionMask(LLVMMaskAllOn); if (bodyStmts) bodyStmts->EmitCode(ctx); assert(ctx->GetCurrentBasicBlock()); + ctx->SetFunctionMask(oldFunctionMask); ctx->BranchInst(btest); // The mask is mixed. Just emit the code for the loop body. @@ -1093,8 +1102,8 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const { // For the varying case, update the mask based on the value of the // test. If any program instances still want to be running, jump // to the top of the loop. Otherwise, jump out. - llvm::Value *mask = ctx->GetMask(); - ctx->MaskAnd(mask, testValue); + llvm::Value *mask = ctx->GetInternalMask(); + ctx->SetInternalMaskAnd(mask, testValue); ctx->BranchIfMaskAny(bloop, bexit); } @@ -1203,7 +1212,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const { (!g->opt.disableUniformControlFlow && !lHasVaryingBreakOrContinue(stmts)); - ctx->StartLoop(bexit, bstep, uniformTest, ctx->GetMask()); + ctx->StartLoop(bexit, bstep, uniformTest); ctx->SetDebugPos(pos); // If we have an initiailizer statement, start by emitting the code for @@ -1216,17 +1225,6 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const { } ctx->BranchInst(btest); - assert(ctx->GetCurrentBasicBlock()); -#if 0 - if (!ctx->GetCurrentBasicBlock()) { - // when does this happen?? - if (init) - ctx->EndScope(); - ctx->EndLoop(); - return; - } -#endif - // Emit code to get the value of the loop test. If no test expression // was provided, just go with a true value. ctx->SetCurrentBasicBlock(btest); @@ -1253,14 +1251,14 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const { ctx->BranchInst(bloop, bexit, ltest); } else { - llvm::Value *mask = ctx->GetMask(); - ctx->MaskAnd(mask, ltest); + llvm::Value *mask = ctx->GetInternalMask(); + ctx->SetInternalMaskAnd(mask, ltest); ctx->BranchIfMaskAny(bloop, bexit); } // On to emitting the code for the loop body. ctx->SetCurrentBasicBlock(bloop); - ctx->SetLoopMask(ctx->GetMask()); + ctx->SetLoopMask(ctx->GetInternalMask()); ctx->AddInstrumentationPoint("for loop body"); if (!dynamic_cast(stmts)) ctx->StartScope(); @@ -1278,10 +1276,13 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const { // the runtime test has passed, make this fact clear for code // generation at compile time here.) ctx->SetCurrentBasicBlock(bAllOn); - ctx->SetMask(LLVMMaskAllOn); + ctx->SetInternalMask(LLVMMaskAllOn); + llvm::Value *oldFunctionMask = ctx->GetFunctionMask(); + ctx->SetFunctionMask(LLVMMaskAllOn); if (stmts) stmts->EmitCode(ctx); assert(ctx->GetCurrentBasicBlock()); + ctx->SetFunctionMask(oldFunctionMask); ctx->BranchInst(bstep); // Emit code for the mask being mixed. We should never run the @@ -1773,7 +1774,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const { args[0] = ctx->GetStringPtr(format); args[1] = ctx->GetStringPtr(argTypes); args[2] = LLVMInt32(g->target.vectorWidth); - args[3] = ctx->LaneMask(ctx->GetMask()); + args[3] = ctx->LaneMask(ctx->GetFullMask()); std::vector argVec(&args[0], &args[5]); ctx->CallInst(printFunc, argVec, ""); } diff --git a/sym.cpp b/sym.cpp index a13f8885..2b557573 100644 --- a/sym.cpp +++ b/sym.cpp @@ -52,6 +52,7 @@ Symbol::Symbol(const std::string &n, SourcePos p, const Type *t, constValue = NULL; storageClass = sc; varyingCFDepth = 0; + parentFunction = NULL; } diff --git a/sym.h b/sym.h index 33a3ff5a..0cf60cd4 100644 --- a/sym.h +++ b/sym.h @@ -108,6 +108,10 @@ public: masked stores when modifying the symbol's value when the store is done at the same 'varying' control flow depth as the one where the symbol was originally declared. */ + const Function *parentFunction; + /*!< For symbols that are parameters to functions or are + variables declared inside functions, this gives the + function they're in. */ };